diff options
Diffstat (limited to 'tools')
1137 files changed, 41820 insertions, 11390 deletions
diff --git a/tools/arch/arm64/include/asm/brk-imm.h b/tools/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 000000000000..beb42c62b6ac --- /dev/null +++ b/tools/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * 0x004: for installing kprobes + * 0x005: for installing uprobes + * 0x006: for kprobe software single-step + * 0x007: for kretprobe return + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) + * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8) + * 0x8xxx: Control-Flow Integrity traps + */ +#define KPROBES_BRK_IMM 0x004 +#define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 +#define KRETPROBES_BRK_IMM 0x007 +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 +#define KASAN_BRK_IMM 0x900 +#define KASAN_BRK_MASK 0x0ff +#define UBSAN_BRK_IMM 0x5500 +#define UBSAN_BRK_MASK 0x00ff + +#define CFI_BRK_IMM_TARGET GENMASK(4, 0) +#define CFI_BRK_IMM_TYPE GENMASK(9, 5) +#define CFI_BRK_IMM_BASE 0x8000 +#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE) + +#endif diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5fd7caea4419..488f8e751349 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -143,6 +144,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -175,6 +177,7 @@ #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -212,6 +215,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h new file mode 100644 index 000000000000..bd592ca81571 --- /dev/null +++ b/tools/arch/arm64/include/asm/esr.h @@ -0,0 +1,455 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ASM_ESR_H +#define __ASM_ESR_H + +#include <asm/sysreg.h> + +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) +/* Unallocated EC: 0x1E */ +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 UL(0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 UL(0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX UL(0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_WIDTH (6) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL_SHIFT (25) +#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR_SHIFT (6) +#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT) + +/* Asynchronous Error Type */ +#define ESR_ELx_IDS_SHIFT (24) +#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT) +#define ESR_ELx_AET_SHIFT (10) +#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT) + +#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_SET_SHIFT (11) +#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) +#define ESR_ELx_FnV_SHIFT (10) +#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT) +#define ESR_ELx_EA_SHIFT (9) +#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT) +#define ESR_ELx_S1PTW_SHIFT (7) +#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_MTE (0x11) +#define ESR_ELx_FSC_SERROR (0x11) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) + +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) + +/* ISS field definitions for Data Aborts */ +#define ESR_ELx_ISV_SHIFT (24) +#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE_SHIFT (21) +#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF_SHIFT (15) +#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT) +#define ESR_ELx_AR_SHIFT (14) +#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT) +#define ESR_ELx_CM_SHIFT (8) +#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) + +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + +/* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) +#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5) +#define ESR_ELx_WFx_ISS_RV (UL(1) << 2) +#define ESR_ELx_WFx_ISS_TI (UL(3) << 0) +#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1) + +#define DISR_EL1_IDS (UL(1) << 24) +/* + * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean + * different things in the future... + */ +#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) + +/* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \ + (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT)) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff + +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define esr_sys64_to_sysreg(e) \ + sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \ + ESR_ELx_SYS64_ISS_OP0_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +#define esr_cp15_to_sysreg(e) \ + sys_reg(3, \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +/* ISS field definitions for ERET/ERETAA/ERETAB trapping */ +#define ESR_ELx_ERET_ISS_ERET 0x2 +#define ESR_ELx_ERET_ISS_ERETA 0x1 + +/* + * ISS field definitions for floating-point exception traps + * (FP_EXC_32/FP_EXC_64). + * + * (The FPEXC_* constants are used instead for common bits.) + */ + +#define ESR_ELx_FP_EXC_TFV (UL(1) << 23) + +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_ZT_DISABLED 4 + +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + +static inline bool esr_is_data_abort(unsigned long esr) +{ + const unsigned long ec = ESR_ELx_EC(esr); + + return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; +} + +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); +} + +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + +const char *esr_get_class_string(unsigned long esr); +#endif /* __ASSEMBLY */ + +#endif /* __ASM_ESR_H */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 964df31da975..66736ff04011 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -484,6 +484,12 @@ enum { */ #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) +/* + * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN. + */ +#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2 (1ULL << 0) + /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso deleted file mode 120000 index 233c7a26f6e5..000000000000 --- a/tools/arch/arm64/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/arm64/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso deleted file mode 120000 index ebda43a82db7..000000000000 --- a/tools/arch/loongarch/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/loongarch/vdso
\ No newline at end of file diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso deleted file mode 120000 index 4e676d1d1cb4..000000000000 --- a/tools/arch/powerpc/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/powerpc/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 05eaf6db3ad4..60345dd2cba2 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -469,7 +469,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kdsa[16]; /* with MSA9 */ __u8 sortl[32]; /* with STFLE.150 */ __u8 dfltcc[32]; /* with STFLE.151 */ - __u8 reserved[1728]; + __u8 pfcr[16]; /* with STFLE.201 */ + __u8 reserved[1712]; }; #define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso deleted file mode 120000 index 6cf4c1cebdcd..000000000000 --- a/tools/arch/s390/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/s390/kernel/vdso64
\ No newline at end of file diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dd4682857c12..17b6590748c0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -215,7 +215,7 @@ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ #define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */ #define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ @@ -317,6 +317,9 @@ #define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */ +#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */ +#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ @@ -348,6 +351,7 @@ #define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ #define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */ #define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -472,7 +476,9 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ /* * BUG word(s) @@ -523,4 +529,5 @@ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 82c6a4d350e0..3ae84c3b8e6d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -247,6 +261,8 @@ #define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SBAF_BIT 8 +#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT) #define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) #define MSR_LBR_NHM_FROM 0x00000680 @@ -363,6 +379,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc @@ -1157,15 +1179,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -1183,11 +1196,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index bf57a824f722..88585c1de416 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -439,6 +439,8 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) +#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 9de35df1afc3..63182a023e9d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 132 #endif +#ifndef __NR_capget +#define __NR_capget 184 +#endif #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index d0f2043d7132..77311e8d1b5d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 121 #endif +#ifndef __NR_capget +#define __NR_capget 125 +#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso deleted file mode 120000 index 7eb962fd3454..000000000000 --- a/tools/arch/x86/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/x86/entry/vdso/
\ No newline at end of file diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index a90a5d110f92..1baee9e2aba9 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -210,7 +210,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, return NULL; } if (proglen > 1000000) { - printf("proglen of %d too big, stopping\n", proglen); + printf("proglen of %u too big, stopping\n", proglen); return NULL; } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ba927379eb20..a4263dfb5e03 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -147,7 +147,11 @@ ifeq ($(feature-llvm),1) # If LLVM is available, use it for JIT disassembly CFLAGS += -DHAVE_LLVM_SUPPORT LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets - CFLAGS += $(shell $(LLVM_CONFIG) --cflags) + # llvm-config always adds -D_GNU_SOURCE, however, it may already be in CFLAGS + # (e.g. when bpftool build is called from selftests build as selftests + # Makefile includes lib.mk which sets -D_GNU_SOURCE) which would cause + # compilation error due to redefinition. Let's filter it out here. + CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) ifeq ($(shell $(LLVM_CONFIG) --shared-mode),static) LIBS += $(shell $(LLVM_CONFIG) --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7d2af1ff3c8d..d005e4fd6128 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <linux/err.h> #include <stdbool.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> #include <linux/btf.h> @@ -21,6 +25,7 @@ #include "main.h" #define KFUNC_DECL_TAG "bpf_kfunc" +#define FASTCALL_DECL_TAG "bpf_fastcall" static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", @@ -284,7 +289,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } else { if (btf_kflag(t)) printf("\n\t'%s' val=%lldLL", name, - (unsigned long long)val); + (long long)val); else printf("\n\t'%s' val=%lluULL", name, (unsigned long long)val); @@ -464,19 +469,59 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +struct ptr_array { + __u32 cnt; + __u32 cap; + const void **elems; +}; + +static int ptr_array_push(const void *ptr, struct ptr_array *arr) +{ + __u32 new_cap; + void *tmp; + + if (arr->cnt == arr->cap) { + new_cap = (arr->cap ?: 16) * 2; + tmp = realloc(arr->elems, sizeof(*arr->elems) * new_cap); + if (!tmp) + return -ENOMEM; + arr->elems = tmp; + arr->cap = new_cap; + } + arr->elems[arr->cnt++] = ptr; + return 0; +} + +static void ptr_array_free(struct ptr_array *arr) +{ + free(arr->elems); +} + +static int cmp_kfuncs(const void *pa, const void *pb, void *ctx) +{ + struct btf *btf = ctx; + const struct btf_type *a = *(void **)pa; + const struct btf_type *b = *(void **)pb; + + return strcmp(btf__str_by_offset(btf, a->name_off), + btf__str_by_offset(btf, b->name_off)); +} + static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) { LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); - int cnt = btf__type_cnt(btf); - int i; + __u32 cnt = btf__type_cnt(btf), i, j; + struct ptr_array fastcalls = {}; + struct ptr_array kfuncs = {}; + int err = 0; printf("\n/* BPF kfuncs */\n"); printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); for (i = 1; i < cnt; i++) { const struct btf_type *t = btf__type_by_id(btf, i); + const struct btf_type *ft; const char *name; - int err; if (!btf_is_decl_tag(t)) continue; @@ -484,27 +529,53 @@ static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) if (btf_decl_tag(t)->component_idx != -1) continue; - name = btf__name_by_offset(btf, t->name_off); - if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + ft = btf__type_by_id(btf, t->type); + if (!btf_is_func(ft)) continue; - t = btf__type_by_id(btf, t->type); - if (!btf_is_func(t)) - continue; + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &kfuncs); + if (err) + goto out; + } + + if (strncmp(name, FASTCALL_DECL_TAG, sizeof(FASTCALL_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &fastcalls); + if (err) + goto out; + } + } + + /* Sort kfuncs by name for improved vmlinux.h stability */ + qsort_r(kfuncs.elems, kfuncs.cnt, sizeof(*kfuncs.elems), cmp_kfuncs, (void *)btf); + for (i = 0; i < kfuncs.cnt; i++) { + const struct btf_type *t = kfuncs.elems[i]; printf("extern "); + /* Assume small amount of fastcall kfuncs */ + for (j = 0; j < fastcalls.cnt; j++) { + if (fastcalls.elems[j] == t) { + printf("__bpf_fastcall "); + break; + } + } + opts.field_name = btf__name_by_offset(btf, t->name_off); err = btf_dump__emit_type_decl(d, t->type, &opts); if (err) - return err; + goto out; printf(" __weak __ksym;\n"); } printf("#endif\n\n"); - return 0; +out: + ptr_array_free(&fastcalls); + ptr_array_free(&kfuncs); + return err; } static void __printf(2, 0) btf_dump_printf(void *ctx, @@ -718,6 +789,13 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef __weak\n"); printf("#define __weak __attribute__((weak))\n"); printf("#endif\n\n"); + printf("#ifndef __bpf_fastcall\n"); + printf("#if __has_attribute(bpf_fastcall)\n"); + printf("#define __bpf_fastcall __attribute__((bpf_fastcall))\n"); + printf("#else\n"); + printf("#define __bpf_fastcall\n"); + printf("#endif\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 7b8d9ec89ebd..c032d2c6ab6d 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -80,7 +80,8 @@ symbol_lookup_callback(__maybe_unused void *disasm_info, static int init_context(disasm_ctx_t *ctx, const char *arch, __maybe_unused const char *disassembler_options, - __maybe_unused unsigned char *image, __maybe_unused ssize_t len) + __maybe_unused unsigned char *image, __maybe_unused ssize_t len, + __maybe_unused __u64 func_ksym) { char *triple; @@ -109,12 +110,13 @@ static void destroy_context(disasm_ctx_t *ctx) } static int -disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc, + __u64 func_ksym) { char buf[256]; int count; - count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, func_ksym + pc, buf, sizeof(buf)); if (json_output) printf_json(buf); @@ -136,8 +138,21 @@ int disasm_init(void) #ifdef HAVE_LIBBFD_SUPPORT #define DISASM_SPACER "\t" +struct disasm_info { + struct disassemble_info info; + __u64 func_ksym; +}; + +static void disasm_print_addr(bfd_vma addr, struct disassemble_info *info) +{ + struct disasm_info *dinfo = container_of(info, struct disasm_info, info); + + addr += dinfo->func_ksym; + generic_print_address(addr, info); +} + typedef struct { - struct disassemble_info *info; + struct disasm_info *info; disassembler_ftype disassemble; bfd *bfdf; } disasm_ctx_t; @@ -215,7 +230,7 @@ static int fprintf_json_styled(void *out, static int init_context(disasm_ctx_t *ctx, const char *arch, const char *disassembler_options, - unsigned char *image, ssize_t len) + unsigned char *image, ssize_t len, __u64 func_ksym) { struct disassemble_info *info; char tpath[PATH_MAX]; @@ -238,12 +253,13 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, } bfdf = ctx->bfdf; - ctx->info = malloc(sizeof(struct disassemble_info)); + ctx->info = malloc(sizeof(struct disasm_info)); if (!ctx->info) { p_err("mem alloc failed"); goto err_close; } - info = ctx->info; + ctx->info->func_ksym = func_ksym; + info = &ctx->info->info; if (json_output) init_disassemble_info_compat(info, stdout, @@ -272,6 +288,7 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, info->disassembler_options = disassembler_options; info->buffer = image; info->buffer_length = len; + info->print_address_func = disasm_print_addr; disassemble_init_for_target(info); @@ -304,9 +321,10 @@ static void destroy_context(disasm_ctx_t *ctx) static int disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image, - __maybe_unused ssize_t len, int pc) + __maybe_unused ssize_t len, int pc, + __maybe_unused __u64 func_ksym) { - return ctx->disassemble(pc, ctx->info); + return ctx->disassemble(pc, &ctx->info->info); } int disasm_init(void) @@ -331,7 +349,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (!len) return -1; - if (init_context(&ctx, arch, disassembler_options, image, len)) + if (init_context(&ctx, arch, disassembler_options, image, len, func_ksym)) return -1; if (json_output) @@ -360,7 +378,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, printf("%4x:" DISASM_SPACER, pc); } - count = disassemble_insn(&ctx, image, len, pc); + count = disassemble_insn(&ctx, image, len, pc, func_ksym); if (json_output) { /* Operand array, was started in fprintf_json. Before diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 9b898571b49e..23f488cf1740 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -54,6 +54,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[refs->ref_cnt]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt++; return; @@ -77,6 +78,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[0]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt = 1; refs->has_bpf_cookie = e->has_bpf_cookie; refs->bpf_cookie = e->bpf_cookie; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ff949ea82fa..e71be67f1d86 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -822,11 +822,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - if (disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum)) - goto exit_free; + if (ksyms) { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; + } else { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + NULL, 0, 0, false)) + goto exit_free; + } img += lens[i]; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index d54aaa0619df..bd9f960bce3d 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -679,8 +679,8 @@ static int sets_patch(struct object *obj) next = rb_first(&obj->sets); while (next) { - struct btf_id_set8 *set8; - struct btf_id_set *set; + struct btf_id_set8 *set8 = NULL; + struct btf_id_set *set = NULL; unsigned long addr, off; struct btf_id *id; diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 9a5c1f008fe6..fced54a3adf6 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -70,7 +70,6 @@ int handle__sched_switch(u64 *ctx) struct task_struct *next = (struct task_struct *)ctx[2]; struct runq_event event = {}; u64 *tsp, delta_us; - long state; u32 pid; /* ivcsw: treat like an enqueue event and store timestamp */ diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ffd117135094..bca47d136f05 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -30,9 +30,7 @@ endef # FEATURE_TESTS_BASIC := \ backtrace \ - dwarf \ - dwarf_getlocations \ - dwarf_getcfi \ + libdw \ eventfd \ fortify-source \ get_current_dir_name \ @@ -53,6 +51,7 @@ FEATURE_TESTS_BASIC := \ libslang-include-subdir \ libtraceevent \ libtracefs \ + libcpupower \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ @@ -60,7 +59,6 @@ FEATURE_TESTS_BASIC := \ reallocarray \ stackprotector-all \ timerfd \ - libdw-dwarf-unwind \ zlib \ lzma \ get_cpuid \ @@ -120,8 +118,7 @@ ifeq ($(FEATURE_TESTS),all) endif FEATURE_DISPLAY ?= \ - dwarf \ - dwarf_getlocations \ + libdw \ glibc \ libbfd \ libbfd-buildid \ @@ -133,7 +130,6 @@ FEATURE_DISPLAY ?= \ libpython \ libcrypto \ libunwind \ - libdw-dwarf-unwind \ libcapstone \ llvm-perf \ zlib \ @@ -233,7 +229,7 @@ endef # # generates feature value assignment for name, like: -# $(call feature_assign,dwarf) == feature-dwarf=1 +# $(call feature_assign,libdw) == feature-libdw=1 # feature_assign = feature-$(1)=$(feature-$(1)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 5938cf799dc6..043dfd00fce7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -5,9 +5,7 @@ FILES= \ test-all.bin \ test-backtrace.bin \ test-bionic.bin \ - test-dwarf.bin \ - test-dwarf_getlocations.bin \ - test-dwarf_getcfi.bin \ + test-libdw.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -38,6 +36,7 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ + test-libcpupower.bin \ test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ @@ -52,7 +51,6 @@ FILES= \ test-pthread-barrier.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ - test-libdw-dwarf-unwind.bin \ test-libbabeltrace.bin \ test-libcapstone.bin \ test-compile-32.bin \ @@ -168,29 +166,26 @@ $(OUTPUT)test-libopencsd.bin: $(BUILD) # -lopencsd_c_api -lopencsd provided by # $(FEATURE_CHECK_LDFLAGS-libopencsd) -DWARFLIBS := -ldw +DWLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + DWLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) # Elfutils merged libebl.a into libdw.a starting from version 0.177, # Link libebl.a only if libdw is older than this version. ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) - DWARFLIBS += -lebl + DWLIBS += -lebl endif -endif - -$(OUTPUT)test-dwarf.bin: - $(BUILD) $(DWARFLIBS) -$(OUTPUT)test-dwarf_getlocations.bin: - $(BUILD) $(DWARFLIBS) + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl +endif -$(OUTPUT)test-dwarf_getcfi.bin: - $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-libdw.bin: + $(BUILD) $(DWLIBS) $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf @@ -245,6 +240,9 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent +$(OUTPUT)test-libcpupower.bin: + $(BUILD) -lcpupower + $(OUTPUT)test-libtracefs.bin: $(BUILD) $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -ltracefs @@ -314,9 +312,6 @@ $(OUTPUT)test-backtrace.bin: $(OUTPUT)test-timerfd.bin: $(BUILD) -$(OUTPUT)test-libdw-dwarf-unwind.bin: - $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) - $(OUTPUT)test-libbabeltrace.bin: $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 6f4bf386a3b5..59ef3d7fe6a4 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -38,12 +38,8 @@ # include "test-glibc.c" #undef main -#define main main_test_dwarf -# include "test-dwarf.c" -#undef main - -#define main main_test_dwarf_getlocations -# include "test-dwarf_getlocations.c" +#define main main_test_libdw +# include "test-libdw.c" #undef main #define main main_test_eventfd @@ -98,10 +94,6 @@ # include "test-stackprotector-all.c" #undef main -#define main main_test_libdw_dwarf_unwind -# include "test-libdw-dwarf-unwind.c" -#undef main - #define main main_test_zlib # include "test-zlib.c" #undef main @@ -187,8 +179,7 @@ int main(int argc, char *argv[]) main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); - main_test_dwarf(); - main_test_dwarf_getlocations(); + main_test_libdw(); main_test_eventfd(); main_test_libelf_getphdrnum(); main_test_libelf_gelf_getnote(); @@ -202,7 +193,6 @@ int main(int argc, char *argv[]) main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); - main_test_libdw_dwarf_unwind(); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/build/feature/test-dwarf.c b/tools/build/feature/test-dwarf.c deleted file mode 100644 index 8d474bd7371b..000000000000 --- a/tools/build/feature/test-dwarf.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <dwarf.h> -#include <elfutils/libdw.h> -#include <elfutils/version.h> - -int main(void) -{ - Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); - - return (long)dbg; -} diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c deleted file mode 100644 index 50e7d7cb7bdf..000000000000 --- a/tools/build/feature/test-dwarf_getcfi.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf *dwarf = NULL; - return dwarf_getcfi(dwarf) == NULL; -} diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c deleted file mode 100644 index 78fb4a1fa68c..000000000000 --- a/tools/build/feature/test-dwarf_getlocations.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdlib.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf_Addr base, start, end; - Dwarf_Attribute attr; - Dwarf_Op *op; - size_t nops; - ptrdiff_t offset = 0; - return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); -} diff --git a/tools/build/feature/test-libcpupower.c b/tools/build/feature/test-libcpupower.c new file mode 100644 index 000000000000..a346aa332a71 --- /dev/null +++ b/tools/build/feature/test-libcpupower.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <cpuidle.h> + +int main(void) +{ + int rv = cpuidle_state_count(0); + return rv; +} diff --git a/tools/build/feature/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c deleted file mode 100644 index ed03d9505609..000000000000 --- a/tools/build/feature/test-libdw-dwarf-unwind.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <elfutils/libdwfl.h> - -int main(void) -{ - /* - * This function is guarded via: __nonnull_attribute__ (1, 2). - * Passing '1' as arguments value. This code is never executed, - * only compiled. - */ - dwfl_thread_getframes((void *) 1, (void *) 1, NULL); - return 0; -} diff --git a/tools/build/feature/test-libdw.c b/tools/build/feature/test-libdw.c new file mode 100644 index 000000000000..2fb59479ab77 --- /dev/null +++ b/tools/build/feature/test-libdw.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <elfutils/version.h> + +int test_libdw(void) +{ + Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + + return (long)dbg; +} + +int test_libdw_unwind(void) +{ + /* + * This function is guarded via: __nonnull_attribute__ (1, 2). + * Passing '1' as arguments value. This code is never executed, + * only compiled. + */ + dwfl_thread_getframes((void *) 1, (void *) 1, NULL); + return 0; +} + +int test_libdw_getlocations(void) +{ + Dwarf_Addr base, start, end; + Dwarf_Attribute attr; + Dwarf_Op *op; + size_t nops; + ptrdiff_t offset = 0; + + return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); +} + +int test_libdw_getcfi(void) +{ + Dwarf *dwarf = NULL; + + return dwarf_getcfi(dwarf) == NULL; +} + +int test_elfutils(void) +{ + Dwarf_CFI *cfi = NULL; + + dwarf_cfi_end(cfi); + return 0; +} + +int main(void) +{ + return test_libdw() + test_libdw_unwind() + test_libdw_getlocations() + + test_libdw_getcfi() + test_elfutils(); +} diff --git a/tools/build/feature/test-libtraceevent.c b/tools/build/feature/test-libtraceevent.c index 416b11ffd4b4..804ad80dbbd9 100644 --- a/tools/build/feature/test-libtraceevent.c +++ b/tools/build/feature/test-libtraceevent.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <traceevent/trace-seq.h> +#include <trace-seq.h> int main(void) { diff --git a/tools/firewire/decode-fcp.c b/tools/firewire/decode-fcp.c index b67ebc88434d..f115a3be8d1e 100644 --- a/tools/firewire/decode-fcp.c +++ b/tools/firewire/decode-fcp.c @@ -160,7 +160,7 @@ decode_avc(struct link_transaction *t) name = info->name; } - printf("av/c %s, subunit_type=%s, subunit_id=%d, opcode=%s", + printf("av/c %s, subunit_type=%s, subunit_id=%u, opcode=%s", ctype_names[frame->ctype], subunit_type_names[frame->subunit_type], frame->subunit_id, name); diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c index 156e0356e814..9a906de3a9ef 100644 --- a/tools/firewire/nosy-dump.c +++ b/tools/firewire/nosy-dump.c @@ -771,7 +771,7 @@ print_packet(uint32_t *data, size_t length) if (pp->phy_config.set_root) printf(" set_root_id=%02x", pp->phy_config.root_id); if (pp->phy_config.set_gap_count) - printf(" set_gap_count=%d", pp->phy_config.gap_count); + printf(" set_gap_count=%u", pp->phy_config.gap_count); } break; @@ -781,13 +781,13 @@ print_packet(uint32_t *data, size_t length) case PHY_PACKET_SELF_ID: if (pp->self_id.extended) { - printf("extended self id: phy_id=%02x, seq=%d", + printf("extended self id: phy_id=%02x, seq=%u", pp->ext_self_id.phy_id, pp->ext_self_id.sequence); } else { static const char * const speed_names[] = { "S100", "S200", "S400", "BETA" }; - printf("self id: phy_id=%02x, link %s, gap_count=%d, speed=%s%s%s", + printf("self id: phy_id=%02x, link %s, gap_count=%u speed=%s%s%s", pp->self_id.phy_id, (pp->self_id.link_active ? "active" : "not active"), pp->self_id.gap_count, diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 5dee2b98ab60..b70813b0bf8e 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -69,14 +69,14 @@ int monitor_device(const char *device_name, } if (num_lines == 1) { - fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name); + fprintf(stdout, "Monitoring line %u on %s\n", lines[0], device_name); fprintf(stdout, "Initial line value: %d\n", gpiotools_test_bit(values.bits, 0)); } else { - fprintf(stdout, "Monitoring lines %d", lines[0]); + fprintf(stdout, "Monitoring lines %u", lines[0]); for (i = 1; i < num_lines - 1; i++) - fprintf(stdout, ", %d", lines[i]); - fprintf(stdout, " and %d on %s\n", lines[i], device_name); + fprintf(stdout, ", %u", lines[i]); + fprintf(stdout, " and %u on %s\n", lines[i], device_name); fprintf(stdout, "Initial line values: %d", gpiotools_test_bit(values.bits, 0)); for (i = 1; i < num_lines - 1; i++) diff --git a/tools/gpio/gpio-sloppy-logic-analyzer.sh b/tools/gpio/gpio-sloppy-logic-analyzer.sh index ed21a110df5e..3ef2278e49f9 100755 --- a/tools/gpio/gpio-sloppy-logic-analyzer.sh +++ b/tools/gpio/gpio-sloppy-logic-analyzer.sh @@ -113,7 +113,7 @@ init_cpu() taskset -p "$newmask" "$p" || continue done 2>/dev/null >/dev/null - # Big hammer! Working with 'rcu_momentary_dyntick_idle()' for a more fine-grained solution + # Big hammer! Working with 'rcu_momentary_eqs()' for a more fine-grained solution # still printed warnings. Same for re-enabling the stall detector after sampling. echo 1 > /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore new file mode 100644 index 000000000000..0c5bc15d602f --- /dev/null +++ b/tools/hv/.gitignore @@ -0,0 +1,3 @@ +hv_fcopy_uio_daemon +hv_kvp_daemon +hv_vss_daemon diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 7a00f3066a98..0198321d14a2 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,8 +35,6 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define MAX_FOLDER_NAME 15 -#define MAX_PATH_LEN 15 #define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" #define FCOPY_VER_COUNT 1 @@ -51,7 +49,7 @@ static const int fw_versions[] = { #define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ -unsigned char desc[HV_RING_SIZE]; +static unsigned char desc[HV_RING_SIZE]; static int target_fd; static char target_fname[PATH_MAX]; @@ -409,8 +407,8 @@ int main(int argc, char *argv[]) struct vmbus_br txbr, rxbr; void *ring; uint32_t len = HV_RING_SIZE; - char uio_name[MAX_FOLDER_NAME] = {0}; - char uio_dev_path[MAX_PATH_LEN] = {0}; + char uio_name[NAME_MAX] = {0}; + char uio_dev_path[PATH_MAX] = {0}; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -468,8 +466,10 @@ int main(int argc, char *argv[]) */ ret = pread(fcopy_fd, &tmp, sizeof(int), 0); if (ret < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - continue; + goto close; } len = HV_RING_SIZE; diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh index 058c17b46ffc..268521234d4b 100755 --- a/tools/hv/hv_get_dns_info.sh +++ b/tools/hv/hv_get_dns_info.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This example script parses /etc/resolv.conf to retrive DNS information. # In the interest of keeping the KVP daemon code free of distro specific @@ -10,4 +10,4 @@ # this script can be based on the Network Manager APIs for retrieving DNS # entries. -cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }' +exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index ae57bf69ad4a..04ba035d67e9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name, * . */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info"); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name); /* * Execute the command to gather DNS info. @@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name, * Enabled: DHCP enabled. */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name); file = popen(cmd, "r"); if (file == NULL) @@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", - "hv_set_ifconfig", if_filename, nm_filename); + str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s", + KVP_SCRIPTS_PATH "hv_set_ifconfig", + if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index 440a91b35823..2f8baed2b8f7 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1 cp $1 /etc/sysconfig/network-scripts/ -chmod 600 $2 +umask 0177 interface=$(echo $2 | awk -F - '{ print $2 }') filename="${2##*/}" diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 8073c9e4fe46..cccf62ea2b8f 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -63,6 +63,7 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_DELTA_VELOCITY] = "deltavelocity", [IIO_COLORTEMP] = "colortemp", [IIO_CHROMATICITY] = "chromaticity", + [IIO_ATTENTION] = "attention", }; static const char * const iio_ev_type_text[] = { @@ -183,6 +184,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_DELTA_VELOCITY: case IIO_COLORTEMP: case IIO_CHROMATICITY: + case IIO_ATTENTION: break; default: return false; @@ -449,6 +451,7 @@ error_free_chrdev_name: enable_events(dev_dir_name, 0); free(chrdev_name); + free(dev_dir_name); return ret; } diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 62e7c901ac28..e20f98e14e81 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_LINUX_COMPILER_H_ -#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." +#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." #endif /* diff --git a/tools/include/linux/unaligned.h b/tools/include/linux/unaligned.h index bc0633bc4650..395a4464fe73 100644 --- a/tools/include/linux/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -9,16 +9,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" #pragma GCC diagnostic ignored "-Wattributes" - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) - -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ -} while (0) +#include <vdso/unaligned.h> #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 2ec13d8b9a2d..f9ab83a219b8 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -10,6 +10,7 @@ #include "compiler.h" #include "crt.h" +#include "std.h" /* Syscalls for s390: * - registers are 64-bit diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index 9bc6a706a332..fa1f547e7f13 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -32,4 +32,10 @@ # define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) #endif /* __nolibc_has_attribute(no_stack_protector) */ +#if __nolibc_has_attribute(fallthrough) +# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough)) +#else +# define __nolibc_fallthrough do { } while (0) +#endif /* __nolibc_has_attribute(fallthrough) */ + #endif /* _NOLIBC_COMPILER_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index c968dbbc4ef8..3892034198dd 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -15,6 +15,7 @@ #include "stdarg.h" #include "stdlib.h" #include "string.h" +#include "compiler.h" #ifndef EOF #define EOF (-1) @@ -264,7 +265,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) case 'p': *(out++) = '0'; *(out++) = 'x'; - /* fall through */ + __nolibc_fallthrough; default: /* 'x' and 'p' above */ u64toh_r(v, out); break; diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..1ea2c4c33b86 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -79,6 +79,9 @@ #define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ +#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */ +#define MADV_GUARD_REMOVE 103 /* unguard range */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 406f7718f9ad..51d2556af54a 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 54d9c8bf7c55..281df9139d2b 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 5bf6148cac2b..88dc393c2bca 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_setxattrat 463 +__SYSCALL(__NR_setxattrat, sys_setxattrat) +#define __NR_getxattrat 464 +__SYSCALL(__NR_getxattrat, sys_getxattrat) +#define __NR_listxattrat 465 +__SYSCALL(__NR_listxattrat, sys_listxattrat) +#define __NR_removexattrat 466 +__SYSCALL(__NR_removexattrat, sys_removexattrat) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 467 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 16122819edfe..7fba37b94401 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1295,16 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 3c2a101986a3..5ee30f882736 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -12,4 +12,7 @@ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_U128(h, l) \ + ((_BIT128((h)) << 1) - (_BIT128(l))) + #endif /* _UAPI_LINUX_BITS_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..4162afc6b5d0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1116,11 +1116,15 @@ enum bpf_attach_type { BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, + BPF_TRACE_UPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[] + * in sync with the definitions below. + */ enum bpf_link_type { BPF_LINK_TYPE_UNSPEC = 0, BPF_LINK_TYPE_RAW_TRACEPOINT = 1, @@ -1970,6 +1974,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description @@ -3101,10 +3107,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * @@ -5369,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. @@ -5519,11 +5521,12 @@ union bpf_attr { * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * - * void *bpf_kptr_xchg(void *map_value, void *ptr) + * void *bpf_kptr_xchg(void *dst, void *ptr) * Description - * Exchange kptr at pointer *map_value* with *ptr*, and return the - * old value. *ptr* can be NULL, otherwise it must be a referenced - * pointer which will be released when this helper is called. + * Exchange kptr at pointer *dst* with *ptr*, and return the old value. + * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise + * it must be a referenced pointer which will be released when this helper + * is called. * Return * The old value of kptr (which can be NULL). The returned pointer * if not NULL, is a reference which must be released using its @@ -6046,11 +6049,6 @@ enum { BPF_F_MARK_ENFORCE = (1ULL << 6), }; -/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -enum { - BPF_F_INGRESS = (1ULL << 0), -}; - /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ enum { BPF_F_TUNINFO_IPV6 = (1ULL << 0), @@ -6197,10 +6195,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; -/* Flags for bpf_redirect_map helper */ +/* Flags for bpf_redirect and bpf_redirect_map helpers */ enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), + BPF_F_INGRESS = (1ULL << 0), /* used for skb path */ + BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */ + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */ +#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) }; #define __bpf_md_ptr(type, name) \ diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index a429381e7ca5..e16be0d37746 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,6 +28,23 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __BIT128() would not work in the asm code, as it shifts an + * 'unsigned __init128' data type as direct representation of + * 128 bit constants is not supported in the gcc compiler, as + * they get silently truncated. + * + * TODO: Please revisit this implementation when gcc compiler + * starts representing 128 bit constants directly like long + * and unsigned long etc. Subsequently drop the comment for + * GENMASK_U128() which would then start supporting asm code. + */ +#define _BIT128(x) ((unsigned __int128)(1) << (x)) +#endif + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index f0d71b2a3f1e..8516c1ccd57a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; @@ -461,6 +462,286 @@ enum in6_addr_gen_mode { /* Bridge section */ +/** + * DOC: Bridge enum definition + * + * Please *note* that the timer values in the following section are expected + * in clock_t format, which is seconds multiplied by USER_HZ (generally + * defined as 100). + * + * @IFLA_BR_FORWARD_DELAY + * The bridge forwarding delay is the time spent in LISTENING state + * (before moving to LEARNING) and in LEARNING state (before moving + * to FORWARDING). Only relevant if STP is enabled. + * + * The valid values are between (2 * USER_HZ) and (30 * USER_HZ). + * The default value is (15 * USER_HZ). + * + * @IFLA_BR_HELLO_TIME + * The time between hello packets sent by the bridge, when it is a root + * bridge or a designated bridge. Only relevant if STP is enabled. + * + * The valid values are between (1 * USER_HZ) and (10 * USER_HZ). + * The default value is (2 * USER_HZ). + * + * @IFLA_BR_MAX_AGE + * The hello packet timeout is the time until another bridge in the + * spanning tree is assumed to be dead, after reception of its last hello + * message. Only relevant if STP is enabled. + * + * The valid values are between (6 * USER_HZ) and (40 * USER_HZ). + * The default value is (20 * USER_HZ). + * + * @IFLA_BR_AGEING_TIME + * Configure the bridge's FDB entries aging time. It is the time a MAC + * address will be kept in the FDB after a packet has been received from + * that address. After this time has passed, entries are cleaned up. + * Allow values outside the 802.1 standard specification for special cases: + * + * * 0 - entry never ages (all permanent) + * * 1 - entry disappears (no persistence) + * + * The default value is (300 * USER_HZ). + * + * @IFLA_BR_STP_STATE + * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off + * (*IFLA_BR_STP_STATE* == 0) for this bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_PRIORITY + * Set this bridge's spanning tree priority, used during STP root bridge + * election. + * + * The valid values are between 0 and 65535. + * + * @IFLA_BR_VLAN_FILTERING + * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off + * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not + * consider the VLAN tag when handling packets. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_PROTOCOL + * Set the protocol used for VLAN filtering. + * + * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value + * is 0x8100(802.1Q). + * + * @IFLA_BR_GROUP_FWD_MASK + * The group forwarding mask. This is the bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses (of the form 01:80:C2:00:00:0X). + * + * The default value is 0, which means the bridge does not forward any + * link-local frames coming on this port. + * + * @IFLA_BR_ROOT_ID + * The bridge root id, read only. + * + * @IFLA_BR_BRIDGE_ID + * The bridge id, read only. + * + * @IFLA_BR_ROOT_PORT + * The bridge root port, read only. + * + * @IFLA_BR_ROOT_PATH_COST + * The bridge root path cost, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE + * The bridge topology change, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED + * The bridge topology change detected, read only. + * + * @IFLA_BR_HELLO_TIMER + * The bridge hello timer, read only. + * + * @IFLA_BR_TCN_TIMER + * The bridge tcn timer, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_TIMER + * The bridge topology change timer, read only. + * + * @IFLA_BR_GC_TIMER + * The bridge gc timer, read only. + * + * @IFLA_BR_GROUP_ADDR + * Set the MAC address of the multicast group this bridge uses for STP. + * The address must be a link-local address in standard Ethernet MAC address + * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f]. + * + * The default value is 0. + * + * @IFLA_BR_FDB_FLUSH + * Flush bridge's fdb dynamic entries. + * + * @IFLA_BR_MCAST_ROUTER + * Set bridge's multicast router if IGMP snooping is enabled. + * The valid values are: + * + * * 0 - disabled. + * * 1 - automatic (queried). + * * 2 - permanently enabled. + * + * The default value is 1. + * + * @IFLA_BR_MCAST_SNOOPING + * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off + * (*IFLA_BR_MCAST_SNOOPING* == 0). + * + * The default value is 1. + * + * @IFLA_BR_MCAST_QUERY_USE_IFADDR + * If enabled use the bridge's own IP address as source address for IGMP + * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0 + * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0). + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_QUERIER + * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable + * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast + * queries by the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_HASH_ELASTICITY + * Set multicast database hash elasticity, It is the maximum chain length in + * the multicast hash table. This attribute is *deprecated* and the value + * is always 16. + * + * @IFLA_BR_MCAST_HASH_MAX + * Set maximum size of the multicast hash table + * + * The default value is 4096, the value must be a power of 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_CNT + * The Last Member Query Count is the number of Group-Specific Queries + * sent before the router assumes there are no local members. The Last + * Member Query Count is also the number of Group-and-Source-Specific + * Queries sent before the router assumes there are no listeners for a + * particular source. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_STARTUP_QUERY_CNT + * The Startup Query Count is the number of Queries sent out on startup, + * separated by the Startup Query Interval. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_INTVL + * The Last Member Query Interval is the Max Response Time inserted into + * Group-Specific Queries sent in response to Leave Group messages, and + * is also the amount of time between Group-Specific Query messages. + * + * The default value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_MEMBERSHIP_INTVL + * The interval after which the bridge will leave a group, if no membership + * reports for this group are received. + * + * The default value is (260 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERIER_INTVL + * The interval between queries sent by other routers. if no queries are + * seen after this delay has passed, the bridge will start to send its own + * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled). + * + * The default value is (255 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_INTVL + * The Query Interval is the interval between General Queries sent by + * the Querier. + * + * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL + * The Max Response Time used to calculate the Max Resp Code inserted + * into the periodic General Queries. + * + * The default value is (10 * USER_HZ). + * + * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL + * The interval between queries in the startup phase. + * + * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_NF_CALL_IPTABLES + * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0) + * iptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_IP6TABLES + * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0) + * ip6tables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_ARPTABLES + * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0) + * arptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_DEFAULT_PVID + * VLAN ID applied to untagged and priority-tagged incoming packets. + * + * The default value is 1. Setting to the special value 0 makes all ports of + * this bridge not have a PVID by default, which means that they will + * not accept VLAN-untagged traffic. + * + * @IFLA_BR_PAD + * Bridge attribute padding type for netlink message. + * + * @IFLA_BR_VLAN_STATS_ENABLED + * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable + * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_STATS_ENABLED + * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable + * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats + * accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_IGMP_VERSION + * Set the IGMP version. + * + * The valid values are 2 and 3. The default value is 2. + * + * @IFLA_BR_MCAST_MLD_VERSION + * Set the MLD version. + * + * The valid values are 1 and 2. The default value is 1. + * + * @IFLA_BR_VLAN_STATS_PER_PORT + * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable + * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting. + * Can be changed only when there are no port VLANs configured. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MULTI_BOOLOPT + * The multi_boolopt is used to control new boolean options to avoid adding + * new netlink attributes. You can look at ``enum br_boolopt_id`` for those + * options. + * + * @IFLA_BR_MCAST_QUERIER_STATE + * Bridge mcast querier states, read only. + * + * @IFLA_BR_FDB_N_LEARNED + * The number of dynamically learned FDB entries for the current bridge, + * read only. + * + * @IFLA_BR_FDB_MAX_LEARNED + * Set the number of max dynamically learned FDB entries for the current + * bridge. + */ enum { IFLA_BR_UNSPEC, IFLA_BR_FORWARD_DELAY, @@ -510,6 +791,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_FDB_N_LEARNED, + IFLA_BR_FDB_MAX_LEARNED, __IFLA_BR_MAX, }; @@ -520,11 +803,252 @@ struct ifla_bridge_id { __u8 addr[6]; /* ETH_ALEN */ }; +/** + * DOC: Bridge mode enum definition + * + * @BRIDGE_MODE_HAIRPIN + * Controls whether traffic may be sent back out of the port on which it + * was received. This option is also called reflective relay mode, and is + * used to support basic VEPA (Virtual Ethernet Port Aggregator) + * capabilities. By default, this flag is turned off and the bridge will + * not forward traffic back out of the receiving port. + */ enum { BRIDGE_MODE_UNSPEC, BRIDGE_MODE_HAIRPIN, }; +/** + * DOC: Bridge port enum definition + * + * @IFLA_BRPORT_STATE + * The operation state of the port. Here are the valid values. + * + * * 0 - port is in STP *DISABLED* state. Make this port completely + * inactive for STP. This is also called BPDU filter and could be used + * to disable STP on an untrusted port, like a leaf virtual device. + * The traffic forwarding is also stopped on this port. + * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled + * on the bridge. In this state the port listens for STP BPDUs and + * drops all other traffic frames. + * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on + * the bridge. In this state the port will accept traffic only for the + * purpose of updating MAC address tables. + * * 3 - port is in STP *FORWARDING* state. Port is fully active. + * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on + * the bridge. This state is used during the STP election process. + * In this state, port will only process STP BPDUs. + * + * @IFLA_BRPORT_PRIORITY + * The STP port priority. The valid values are between 0 and 255. + * + * @IFLA_BRPORT_COST + * The STP path cost of the port. The valid values are between 1 and 65535. + * + * @IFLA_BRPORT_MODE + * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details. + * + * @IFLA_BRPORT_GUARD + * Controls whether STP BPDUs will be processed by the bridge port. By + * default, the flag is turned off to allow BPDU processing. Turning this + * flag on will disable the bridge port if a STP BPDU packet is received. + * + * If the bridge has Spanning Tree enabled, hostile devices on the network + * may send BPDU on a port and cause network failure. Setting *guard on* + * will detect and stop this by disabling the port. The port will be + * restarted if the link is brought down, or removed and reattached. + * + * @IFLA_BRPORT_PROTECT + * Controls whether a given port is allowed to become a root port or not. + * Only used when STP is enabled on the bridge. By default the flag is off. + * + * This feature is also called root port guard. If BPDU is received from a + * leaf (edge) port, it should not be elected as root port. This could + * be used if using STP on a bridge and the downstream bridges are not fully + * trusted; this prevents a hostile guest from rerouting traffic. + * + * @IFLA_BRPORT_FAST_LEAVE + * This flag allows the bridge to immediately stop multicast traffic + * forwarding on a port that receives an IGMP Leave message. It is only used + * when IGMP snooping is enabled on the bridge. By default the flag is off. + * + * @IFLA_BRPORT_LEARNING + * Controls whether a given port will learn *source* MAC addresses from + * received traffic or not. Also controls whether dynamic FDB entries + * (which can also be added by software) will be refreshed by incoming + * traffic. By default this flag is on. + * + * @IFLA_BRPORT_UNICAST_FLOOD + * Controls whether unicast traffic for which there is no FDB entry will + * be flooded towards this port. By default this flag is on. + * + * @IFLA_BRPORT_PROXYARP + * Enable proxy ARP on this port. + * + * @IFLA_BRPORT_LEARNING_SYNC + * Controls whether a given port will sync MAC addresses learned on device + * port to bridge FDB. + * + * @IFLA_BRPORT_PROXYARP_WIFI + * Enable proxy ARP on this port which meets extended requirements by + * IEEE 802.11 and Hotspot 2.0 specifications. + * + * @IFLA_BRPORT_ROOT_ID + * + * @IFLA_BRPORT_BRIDGE_ID + * + * @IFLA_BRPORT_DESIGNATED_PORT + * + * @IFLA_BRPORT_DESIGNATED_COST + * + * @IFLA_BRPORT_ID + * + * @IFLA_BRPORT_NO + * + * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + * + * @IFLA_BRPORT_CONFIG_PENDING + * + * @IFLA_BRPORT_MESSAGE_AGE_TIMER + * + * @IFLA_BRPORT_FORWARD_DELAY_TIMER + * + * @IFLA_BRPORT_HOLD_TIMER + * + * @IFLA_BRPORT_FLUSH + * Flush bridge ports' fdb dynamic entries. + * + * @IFLA_BRPORT_MULTICAST_ROUTER + * Configure the port's multicast router presence. A port with + * a multicast router will receive all multicast traffic. + * The valid values are: + * + * * 0 disable multicast routers on this port + * * 1 let the system detect the presence of routers (default) + * * 2 permanently enable multicast traffic forwarding on this port + * * 3 enable multicast routers temporarily on this port, not depending + * on incoming queries. + * + * @IFLA_BRPORT_PAD + * + * @IFLA_BRPORT_MCAST_FLOOD + * Controls whether a given port will flood multicast traffic for which + * there is no MDB entry. By default this flag is on. + * + * @IFLA_BRPORT_MCAST_TO_UCAST + * Controls whether a given port will replicate packets using unicast + * instead of multicast. By default this flag is off. + * + * This is done by copying the packet per host and changing the multicast + * destination MAC to a unicast one accordingly. + * + * *mcast_to_unicast* works on top of the multicast snooping feature of the + * bridge. Which means unicast copies are only delivered to hosts which + * are interested in unicast and signaled this via IGMP/MLD reports previously. + * + * This feature is intended for interface types which have a more reliable + * and/or efficient way to deliver unicast packets than broadcast ones + * (e.g. WiFi). + * + * However, it should only be enabled on interfaces where no IGMPv2/MLDv1 + * report suppression takes place. IGMP/MLD report suppression issue is + * usually overcome by the network daemon (supplicant) enabling AP isolation + * and by that separating all STAs. + * + * Delivery of STA-to-STA IP multicast is made possible again by enabling + * and utilizing the bridge hairpin mode, which considers the incoming port + * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option). + * Hairpin mode is performed after multicast snooping, therefore leading + * to only deliver reports to STAs running a multicast router. + * + * @IFLA_BRPORT_VLAN_TUNNEL + * Controls whether vlan to tunnel mapping is enabled on the port. + * By default this flag is off. + * + * @IFLA_BRPORT_BCAST_FLOOD + * Controls flooding of broadcast traffic on the given port. By default + * this flag is on. + * + * @IFLA_BRPORT_GROUP_FWD_MASK + * Set the group forward mask. This is a bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults + * to 0, which means the bridge does not forward any link-local frames + * coming on this port). + * + * @IFLA_BRPORT_NEIGH_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression + * is enabled on the port. By default this flag is off. + * + * @IFLA_BRPORT_ISOLATED + * Controls whether a given port will be isolated, which means it will be + * able to communicate with non-isolated ports only. By default this + * flag is off. + * + * @IFLA_BRPORT_BACKUP_PORT + * Set a backup port. If the port loses carrier all traffic will be + * redirected to the configured backup port. Set the value to 0 to disable + * it. + * + * @IFLA_BRPORT_MRP_RING_OPEN + * + * @IFLA_BRPORT_MRP_IN_OPEN + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + * The number of per-port EHT hosts limit. The default value is 512. + * Setting to 0 is not allowed. + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + * The current number of tracked hosts, read only. + * + * @IFLA_BRPORT_LOCKED + * Controls whether a port will be locked, meaning that hosts behind the + * port will not be able to communicate through the port unless an FDB + * entry with the unit's MAC address is in the FDB. The common use case is + * that hosts are allowed access through authentication with the IEEE 802.1X + * protocol or based on whitelists. By default this flag is off. + * + * Please note that secure 802.1X deployments should always use the + * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its + * FDB based on link-local (EAPOL) traffic received on the port. + * + * @IFLA_BRPORT_MAB + * Controls whether a port will use MAC Authentication Bypass (MAB), a + * technique through which select MAC addresses may be allowed on a locked + * port, without using 802.1X authentication. Packets with an unknown source + * MAC address generates a "locked" FDB entry on the incoming bridge port. + * The common use case is for user space to react to these bridge FDB + * notifications and optionally replace the locked FDB entry with a normal + * one, allowing traffic to pass for whitelisted MAC addresses. + * + * Setting this flag also requires *IFLA_BRPORT_LOCKED* and + * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized + * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic + * FDB entries installed by user space (as replacements for the locked FDB + * entries) to be refreshed and/or aged out. + * + * @IFLA_BRPORT_MCAST_N_GROUPS + * + * @IFLA_BRPORT_MCAST_MAX_GROUPS + * Sets the maximum number of MDB entries that can be registered for a + * given port. Attempts to register more MDB entries at the port than this + * limit allows will be rejected, whether they are done through netlink + * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a + * limit of 0 disables the limit. The default value is 0. + * + * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression is + * enabled for a given port. By default this flag is off. + * + * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS* + * is enabled for a given port. + * + * @IFLA_BRPORT_BACKUP_NHID + * The FDB nexthop object ID to attach to packets being redirected to a + * backup port that has VLAN tunnel mapping enabled (via the + * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has + * the effect of not attaching any ID. + */ enum { IFLA_BRPORT_UNSPEC, IFLA_BRPORT_STATE, /* Spanning tree state */ @@ -769,6 +1293,19 @@ enum netkit_mode { NETKIT_L3, }; +/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to + * the BPF program if attached. This also means the latter can + * consume the two fields if they were populated earlier. + * + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before + * invoking the attached BPF program when the peer device resides + * in a different network namespace. This is the default behavior. + */ +enum netkit_scrub { + NETKIT_SCRUB_NONE, + NETKIT_SCRUB_DEFAULT, +}; + enum { IFLA_NETKIT_UNSPEC, IFLA_NETKIT_PEER_INFO, @@ -776,6 +1313,8 @@ enum { IFLA_NETKIT_POLICY, IFLA_NETKIT_PEER_POLICY, IFLA_NETKIT_MODE, + IFLA_NETKIT_SCRUB, + IFLA_NETKIT_PEER_SCRUB, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) @@ -854,6 +1393,7 @@ enum { IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, + IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -871,6 +1411,13 @@ enum ifla_vxlan_df { VXLAN_DF_MAX = __VXLAN_DF_END - 1, }; +enum ifla_vxlan_label_policy { + VXLAN_LABEL_FIXED = 0, + VXLAN_LABEL_INHERIT = 1, + __VXLAN_LABEL_END, + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, @@ -935,6 +1482,8 @@ enum { IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, IFLA_GTP_RESTART_COUNT, + IFLA_GTP_LOCAL, + IFLA_GTP_LOCAL6, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) @@ -1240,6 +1789,7 @@ enum { IFLA_HSR_PROTOCOL, /* Indicate different protocol than * HSR. For example PRP. */ + IFLA_HSR_INTERLINK, /* HSR interlink network device */ __IFLA_HSR_MAX, }; @@ -1417,7 +1967,9 @@ enum { enum { IFLA_DSA_UNSPEC, - IFLA_DSA_MASTER, + IFLA_DSA_CONDUIT, + /* Deprecated, use IFLA_DSA_CONDUIT instead */ + IFLA_DSA_MASTER = IFLA_DSA_CONDUIT, __IFLA_DSA_MAX, }; diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index d358add1611c..5d32d53508d9 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -141,7 +141,7 @@ struct in_addr { */ #define IP_PMTUDISC_INTERFACE 4 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get - * fragmented if they exeed the interface mtu + * fragmented if they exceed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 637efc055145..502ea63b5d2e 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1158,7 +1158,15 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7c308f04e7a0..e4be227d3ad6 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -122,6 +122,9 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) @@ -199,6 +202,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 4842c36fdf80..0524d541d4e3 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -511,7 +511,16 @@ struct perf_event_attr { __u16 sample_max_stack; __u16 __reserved_2; __u32 aux_sample_size; - __u32 __reserved_3; + + union { + __u32 aux_action; + struct { + __u32 aux_start_paused : 1, /* start AUX area tracing paused */ + aux_pause : 1, /* on overflow, pause AUX area tracing */ + aux_resume : 1, /* on overflow, resume AUX area tracing */ + __reserved_3 : 29; + }; + }; /* * User provided data if sigtrap=1, passed back to user via diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h new file mode 100644 index 000000000000..eee3d2a4dbe4 --- /dev/null +++ b/tools/include/vdso/unaligned.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_UNALIGNED_H +#define __VDSO_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#endif /* __VDSO_UNALIGNED_H */ diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index d3eb04d1bc89..1731996b2c32 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -189,6 +189,7 @@ static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_ err_out: free(line); *line_out = NULL; + *line_len_out = 0; return -ENOMEM; } diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b22f0f37288..857a5f7b413d 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -61,7 +61,8 @@ ifndef VERBOSE endif INCLUDES = -I$(or $(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2a4c71501a17..becdfa701c75 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -776,6 +776,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); break; case BPF_TRACE_UPROBE_MULTI: + case BPF_TRACE_UPROBE_SESSION: attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index fdf44403ff36..6ff963a491d9 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -34,6 +34,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 80bc0242e8dc..686824b8b413 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -185,6 +185,7 @@ enum libbpf_tristate { #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) +#define __uptr __attribute__((btf_type_tag("uptr"))) #if defined (__clang__) #define bpf_ksym_exists(sym) ({ \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c131039c523..12468ae0d573 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -22,6 +22,7 @@ #include "libbpf_internal.h" #include "hashmap.h" #include "strset.h" +#include "str_error.h" #define BTF_MAX_NR_TYPES 0x7fffffffU #define BTF_MAX_STR_OFFSET 0x7fffffffU @@ -1179,7 +1180,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); + pr_warn("failed to open %s: %s\n", path, errstr(err)); return ERR_PTR(err); } @@ -1445,7 +1446,7 @@ retry_load: goto retry_load; err = -errno; - pr_warn("BTF loading error: %d\n", err); + pr_warn("BTF loading error: %s\n", errstr(err)); /* don't print out contents of custom log_buf */ if (!log_buf && buf[0]) pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); @@ -2885,7 +2886,7 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return btf_commit_type(btf, sz); } -struct btf_ext_sec_setup_param { +struct btf_ext_sec_info_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -2893,14 +2894,20 @@ struct btf_ext_sec_setup_param { const char *desc; }; -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) +/* + * Parse a single info subsection of the BTF.ext info data: + * - validate subsection structure and elements + * - save info subsection start and sizing details in struct btf_ext + * - endian-independent operation, for calling before byte-swapping + */ +static int btf_ext_parse_sec_info(struct btf_ext *btf_ext, + struct btf_ext_sec_info_param *ext_sec, + bool is_native) { const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; size_t sec_cnt = 0; - /* The start of the info sec (including the __u32 record_size). */ void *info; if (ext_sec->len == 0) @@ -2912,6 +2919,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } + /* The start of the info sec (including the __u32 record_size). */ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; info_left = ext_sec->len; @@ -2927,9 +2935,13 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; + /* The record size needs to meet either the minimum standard or, when + * handling non-native endianness data, the exact standard so as + * to allow safe byte-swapping. + */ + record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info); if (record_size < ext_sec->min_rec_size || + (!is_native && record_size != ext_sec->min_rec_size) || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); @@ -2941,7 +2953,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc); return -EINVAL; } @@ -2956,7 +2968,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - num_records = sinfo->num_info; + num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info); if (num_records == 0) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2984,64 +2996,157 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return 0; } -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +/* Parse all info secs in the BTF.ext info data */ +static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) { - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param func_info = { .off = btf_ext->hdr->func_info_off, .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param line_info = { .off = btf_ext->hdr->line_info_off, .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param core_relo = { .off = btf_ext->hdr->core_relo_off, .len = btf_ext->hdr->core_relo_len, .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", }; + int err; + + err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native); + if (err) + return err; + + err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native); + if (err) + return err; + + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return 0; /* skip core relos parsing */ + + err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); + if (err) + return err; + + return 0; +} + +/* Swap byte-order of BTF.ext header with any endianness */ +static void btf_ext_bswap_hdr(struct btf_ext_header *h) +{ + bool is_native = h->magic == BTF_MAGIC; + __u32 hdr_len; + + hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len); + + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->func_info_off = bswap_32(h->func_info_off); + h->func_info_len = bswap_32(h->func_info_len); + h->line_info_off = bswap_32(h->line_info_off); + h->line_info_len = bswap_32(h->line_info_len); + + if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + h->core_relo_off = bswap_32(h->core_relo_off); + h->core_relo_len = bswap_32(h->core_relo_len); +} + +/* Swap byte-order of generic info subsection */ +static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native, + info_rec_bswap_fn bswap_fn) +{ + struct btf_ext_info_sec *sec; + __u32 info_left, rec_size, *rs; + + if (len == 0) + return; + + rs = info; /* info record size */ + rec_size = is_native ? *rs : bswap_32(*rs); + *rs = bswap_32(*rs); + + sec = info + sizeof(__u32); /* info sec #1 */ + info_left = len - sizeof(__u32); + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, num_recs; + void *p; + + num_recs = is_native ? sec->num_info : bswap_32(sec->num_info); + sec->sec_name_off = bswap_32(sec->sec_name_off); + sec->num_info = bswap_32(sec->num_info); + p = sec->data; /* info rec #1 */ + for (i = 0; i < num_recs; i++, p += rec_size) + bswap_fn(p); + sec = p; + info_left -= sec_hdrlen + (__u64)rec_size * num_recs; + } +} + +/* + * Swap byte-order of all info data in a BTF.ext section + * - requires BTF.ext hdr in native endianness + */ +static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data) +{ + const bool is_native = btf_ext->swapped_endian; + const struct btf_ext_header *h = data; + void *info; + + /* Swap func_info subsection byte-order */ + info = data + h->hdr_len + h->func_info_off; + btf_ext_bswap_info_sec(info, h->func_info_len, is_native, + (info_rec_bswap_fn)bpf_func_info_bswap); + + /* Swap line_info subsection byte-order */ + info = data + h->hdr_len + h->line_info_off; + btf_ext_bswap_info_sec(info, h->line_info_len, is_native, + (info_rec_bswap_fn)bpf_line_info_bswap); + + /* Swap core_relo subsection byte-order (if present) */ + if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; - return btf_ext_setup_info(btf_ext, ¶m); + info = data + h->hdr_len + h->core_relo_off; + btf_ext_bswap_info_sec(info, h->core_relo_len, is_native, + (info_rec_bswap_fn)bpf_core_relo_bswap); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +/* Parse hdr data and info sections: check and convert to native endianness */ +static int btf_ext_parse(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 hdr_len, data_size = btf_ext->data_size; + struct btf_ext_header *hdr = btf_ext->hdr; + bool swapped_endian = false; + int err; - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); + if (data_size < offsetofend(struct btf_ext_header, hdr_len)) { + pr_debug("BTF.ext header too short\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; if (hdr->magic == bswap_16(BTF_MAGIC)) { - pr_warn("BTF.ext in non-native endianness is not supported\n"); - return -ENOTSUP; + swapped_endian = true; + hdr_len = bswap_32(hdr_len); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { + /* Ensure known version of structs, current BTF_VERSION == 1 */ + if (hdr->version != 1) { pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } @@ -3051,11 +3156,39 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -ENOTSUP; } - if (data_size == hdr->hdr_len) { + if (data_size < hdr_len) { + pr_debug("BTF.ext header not found\n"); + return -EINVAL; + } else if (data_size == hdr_len) { pr_debug("BTF.ext has no data\n"); return -EINVAL; } + /* Verify mandatory hdr info details present */ + if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + pr_warn("BTF.ext header missing func_info, line_info\n"); + return -EINVAL; + } + + /* Keep hdr native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_hdr(btf_ext->hdr); + + /* Validate info subsections and cache key metadata */ + err = btf_ext_parse_info(btf_ext, !swapped_endian); + if (err) + return err; + + /* Keep infos native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_info(btf_ext, btf_ext->data); + + /* + * Set btf_ext->swapped_endian only after all header and info data has + * been swapped, helping bswap functions determine if their data are + * in native byte-order when called. + */ + btf_ext->swapped_endian = swapped_endian; return 0; } @@ -3067,6 +3200,7 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext->line_info.sec_idxs); free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); + free(btf_ext->data_swapped); free(btf_ext); } @@ -3087,29 +3221,7 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - err = btf_ext_parse_hdr(btf_ext->data, size); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) - goto done; /* skip core relos parsing */ - - err = btf_ext_setup_core_relos(btf_ext); - if (err) - goto done; + err = btf_ext_parse(btf_ext); done: if (err) { @@ -3120,15 +3232,66 @@ done: return btf_ext; } +static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian) +{ + struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro; + const __u32 data_sz = btf_ext->data_size; + void *data; + + /* Return native data (always present) or swapped data if present */ + if (!swap_endian) + return btf_ext->data; + else if (btf_ext->data_swapped) + return btf_ext->data_swapped; + + /* Recreate missing swapped data, then cache and return */ + data = calloc(1, data_sz); + if (!data) + return NULL; + memcpy(data, btf_ext->data, data_sz); + + btf_ext_bswap_info(btf_ext, data); + btf_ext_bswap_hdr(data); + btf_ext->data_swapped = data; + return data; +} + const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { + void *data; + + data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian); + if (!data) + return errno = ENOMEM, NULL; + *size = btf_ext->data_size; - return btf_ext->data; + return data; } __attribute__((alias("btf_ext__raw_data"))) const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); +enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext) +{ + if (is_host_big_endian()) + return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian) +{ + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return libbpf_err(-EINVAL); + + btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + + if (!btf_ext->swapped_endian) { + free(btf_ext->data_swapped); + btf_ext->data_swapped = NULL; + } + return 0; +} struct btf_dedup; @@ -3291,7 +3454,7 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d)); return libbpf_err(-EINVAL); } @@ -3302,42 +3465,42 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) err = btf_dedup_prep(d); if (err) { - pr_debug("btf_dedup_prep failed:%d\n", err); + pr_debug("btf_dedup_prep failed: %s\n", errstr(err)); goto done; } err = btf_dedup_strings(d); if (err < 0) { - pr_debug("btf_dedup_strings failed:%d\n", err); + pr_debug("btf_dedup_strings failed: %s\n", errstr(err)); goto done; } err = btf_dedup_prim_types(d); if (err < 0) { - pr_debug("btf_dedup_prim_types failed:%d\n", err); + pr_debug("btf_dedup_prim_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_struct_types(d); if (err < 0) { - pr_debug("btf_dedup_struct_types failed:%d\n", err); + pr_debug("btf_dedup_struct_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_resolve_fwds(d); if (err < 0) { - pr_debug("btf_dedup_resolve_fwds failed:%d\n", err); + pr_debug("btf_dedup_resolve_fwds failed: %s\n", errstr(err)); goto done; } err = btf_dedup_ref_types(d); if (err < 0) { - pr_debug("btf_dedup_ref_types failed:%d\n", err); + pr_debug("btf_dedup_ref_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_compact_types(d); if (err < 0) { - pr_debug("btf_dedup_compact_types failed:%d\n", err); + pr_debug("btf_dedup_compact_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_remap_types(d); if (err < 0) { - pr_debug("btf_dedup_remap_types failed:%d\n", err); + pr_debug("btf_dedup_remap_types failed: %s\n", errstr(err)); goto done; } @@ -3385,7 +3548,7 @@ struct btf_dedup { struct strset *strs_set; }; -static long hash_combine(long h, long value) +static unsigned long hash_combine(unsigned long h, unsigned long value) { return h * 31 + value; } @@ -5056,7 +5219,8 @@ struct btf *btf__load_vmlinux_btf(void) btf = btf__parse(sysfs_btf_path, NULL); if (!btf) { err = -errno; - pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); + pr_warn("failed to read kernel BTF from '%s': %s\n", + sysfs_btf_path, errstr(err)); return libbpf_err_ptr(err); } pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); @@ -5073,7 +5237,7 @@ struct btf *btf__load_vmlinux_btf(void) btf = btf__parse(path, NULL); err = libbpf_get_error(btf); - pr_debug("loading kernel BTF '%s': %d\n", path, err); + pr_debug("loading kernel BTF '%s': %s\n", path, errstr(err)); if (err) continue; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4e349ad79ee6..47ee8f6ac489 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -167,6 +167,9 @@ LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); +LIBBPF_API enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__set_endianness(struct btf_ext *btf_ext, + enum btf_endianness endian); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0a7327541c17..a3fc6908f6c9 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -21,6 +21,7 @@ #include "hashmap.h" #include "libbpf.h" #include "libbpf_internal.h" +#include "str_error.h" static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -867,8 +868,8 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, } pads[] = { {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} }; - int new_off, pad_bits, bits, i; - const char *pad_type; + int new_off = 0, pad_bits = 0, bits, i; + const char *pad_type = NULL; if (cur_off >= next_off) return; /* no gap */ @@ -1304,7 +1305,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warn("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack: %s\n", errstr(err)); d->decl_stack_cnt = stack_start; return; } diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index 4f7399d85eab..b72f83e15156 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -428,7 +428,7 @@ static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) } else { off = r->str_map[*str_off]; if (!off) { - pr_warn("string '%s' [offset %u] is not mapped to base BTF", + pr_warn("string '%s' [offset %u] is not mapped to base BTF\n", btf__str_by_offset(r->btf, off), *str_off); return -ENOENT; } diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b5ab1cb13e5e..823f83ad819c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -24,7 +24,6 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) { - char errmsg[STRERR_BUFSIZE]; int fd, ret; Elf *elf; @@ -38,8 +37,7 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) fd = open(binary_path, O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = -errno; - pr_warn("elf: failed to open %s: %s\n", binary_path, - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + pr_warn("elf: failed to open %s: %s\n", binary_path, errstr(ret)); return ret; } elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 50befe125ddc..760657f5224c 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -47,7 +47,6 @@ static int probe_kern_prog_name(int token_fd) static int probe_kern_global_data(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), @@ -67,9 +66,8 @@ static int probe_kern_global_data(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -267,7 +265,6 @@ static int probe_kern_probe_read_kernel(int token_fd) static int probe_prog_bind_map(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -285,9 +282,8 @@ static int probe_prog_bind_map(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -604,7 +600,8 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ } else if (ret == 0) { WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + pr_warn("Detection of kernel %s support failed: %s\n", + feat->desc, errstr(ret)); WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cf3323fd47b8..113ae4abd345 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -14,6 +14,7 @@ #include "bpf_gen_internal.h" #include "skel_internal.h" #include <asm/byteorder.h> +#include "str_error.h" #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -393,7 +394,7 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); - pr_debug("gen: finish %d\n", gen->error); + pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; @@ -401,6 +402,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +424,28 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +454,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +490,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +532,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -784,12 +818,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -840,7 +874,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -931,48 +965,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1040,7 +1120,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); @@ -1068,6 +1147,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1084,14 +1165,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1112,8 +1195,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..66173ddb5a2d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -133,6 +133,7 @@ static const char * const attach_type_name[] = { [BPF_NETKIT_PRIMARY] = "netkit_primary", [BPF_NETKIT_PEER] = "netkit_peer", [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", + [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", }; static const char * const link_type_name[] = { @@ -694,6 +695,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; + unsigned char byteorder; + struct btf *btf; struct btf_ext *btf_ext; @@ -940,6 +943,20 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } +static void bpf_object_bswap_progs(struct bpf_object *obj) +{ + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; + + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); +} + static const struct btf_member * find_member_by_offset(const struct btf_type *t, __u32 bit_offset) { @@ -1506,6 +1523,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; obj->efile.arena_data = NULL; @@ -1533,11 +1551,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("elf: failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); return err; } @@ -1571,6 +1586,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1599,19 +1624,15 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int @@ -1937,8 +1958,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("failed to alloc map '%s' content buffer: %d\n", - map->name, err); + pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); zfree(&map->real_name); zfree(&map->name); return err; @@ -2102,7 +2122,7 @@ static int parse_u64(const char *value, __u64 *res) *res = strtoull(value, &value_end, 0); if (errno) { err = -errno; - pr_warn("failed to parse '%s' as integer: %d\n", value, err); + pr_warn("failed to parse '%s': %s\n", value, errstr(err)); return err; } if (*value_end) { @@ -2268,8 +2288,8 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) while (gzgets(file, buf, sizeof(buf))) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing system Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing system Kconfig line '%s': %s\n", + buf, errstr(err)); goto out; } } @@ -2289,15 +2309,15 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, file = fmemopen((void *)config, strlen(config), "r"); if (!file) { err = -errno; - pr_warn("failed to open in-memory Kconfig: %d\n", err); + pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); return err; } while (fgets(buf, sizeof(buf), file)) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing in-memory Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing in-memory Kconfig line '%s': %s\n", + buf, errstr(err)); break; } } @@ -3212,7 +3232,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, err = libbpf_get_error(obj->btf); if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ @@ -3230,8 +3250,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", - BTF_EXT_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", + BTF_EXT_ELF_SEC, errstr(err)); obj->btf_ext = NULL; goto out; } @@ -3323,8 +3343,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, if (t->size == 0) { err = find_elf_sec_sz(obj, sec_name, &size); if (err || !size) { - pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", - sec_name, size, err); + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", + sec_name, size, errstr(err)); return -ENOENT; } @@ -3478,7 +3498,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { - pr_warn("Error loading vmlinux BTF: %d\n", err); + pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); obj->btf_vmlinux = NULL; return err; } @@ -3581,11 +3601,14 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) report: if (err) { btf_mandatory = kernel_needs_btf(obj); - pr_warn("Error loading .BTF into kernel: %d. %s\n", err, - btf_mandatory ? "BTF is mandatory, can't proceed." - : "BTF is optional, ignoring."); - if (!btf_mandatory) + if (btf_mandatory) { + pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", + errstr(err)); + } else { + pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", + errstr(err)); err = 0; + } } return err; } @@ -3953,6 +3976,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset * for faster search */ @@ -3985,7 +4012,7 @@ static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) return true; /* global function */ - return bind == STB_GLOBAL && type == STT_FUNC; + return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; } static int find_extern_btf_id(const struct btf *btf, const char *ext_name) @@ -4389,7 +4416,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; + return prog->sec_idx == obj->efile.text_shndx; } struct bpf_program * @@ -4783,8 +4810,8 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) fp = fopen(file, "re"); if (!fp) { err = -errno; - pr_warn("failed to open %s: %d. No procfs support?\n", file, - err); + pr_warn("failed to open %s: %s. No procfs support?\n", file, + errstr(err)); return err; } @@ -4939,8 +4966,8 @@ static int bpf_object_prepare_token(struct bpf_object *obj) bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); if (bpffs_fd < 0) { err = -errno; - __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", - obj->name, err, bpffs_path, + __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", + obj->name, errstr(err), bpffs_path, mandatory ? "" : ", skipping optional step..."); return mandatory ? err : 0; } @@ -4974,7 +5001,6 @@ static int bpf_object_prepare_token(struct bpf_object *obj) static int bpf_object__probe_loading(struct bpf_object *obj) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -4990,7 +5016,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bump_rlimit_memlock(); if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", + errstr(ret)); /* make sure basic loading works */ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); @@ -4998,11 +5025,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " - "program. Make sure your kernel supports BPF " - "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " - "set to big enough value.\n", __func__, cp, ret); + pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", + __func__, errstr(ret)); return -ret; } close(ret); @@ -5027,7 +5051,6 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; - char msg[STRERR_BUFSIZE]; __u32 map_info_len = sizeof(map_info); int err; @@ -5037,7 +5060,7 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); if (err) { pr_warn("failed to get map info for map FD %d: %s\n", map_fd, - libbpf_strerror_r(errno, msg, sizeof(msg))); + errstr(err)); return false; } @@ -5052,7 +5075,6 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) static int bpf_object__reuse_map(struct bpf_map *map) { - char *cp, errmsg[STRERR_BUFSIZE]; int err, pin_fd; pin_fd = bpf_obj_get(map->pin_path); @@ -5064,9 +5086,8 @@ bpf_object__reuse_map(struct bpf_map *map) return 0; } - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); + map->pin_path, errstr(err)); return err; } @@ -5092,8 +5113,8 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + size_t mmap_sz; if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, @@ -5106,9 +5127,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error setting initial map(%s) contents: %s\n", - map->name, cp); + pr_warn("map '%s': failed to set initial contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } @@ -5117,12 +5137,43 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("map '%s': failed to freeze as read-only: %s\n", + bpf_map__name(map), errstr(err)); return err; } } + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure. + */ + mmap_sz = bpf_map_mmap_sz(map); + if (map->def.map_flags & BPF_F_MMAPABLE) { + void *mmaped; + int prot; + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); + if (mmaped == MAP_FAILED) { + err = -errno; + pr_warn("map '%s': failed to re-mmap() contents: %s\n", + bpf_map__name(map), errstr(err)); + return err; + } + map->mmaped = mmaped; + } else if (map->mmaped) { + munmap(map->mmaped, mmap_sz); + map->mmaped = NULL; + } + return 0; } @@ -5171,8 +5222,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { - pr_warn("map '%s': failed to create inner map: %d\n", - map->name, err); + pr_warn("map '%s': failed to create inner map: %s\n", + map->name, errstr(err)); return err; } map->inner_map_fd = map->inner_map->fd; @@ -5226,12 +5277,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b def->max_entries, &create_attr); } if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { - char *cp, errmsg[STRERR_BUFSIZE]; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", + map->name, errstr(err)); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5286,8 +5334,8 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) } if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", + map->name, i, targ_map->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5319,8 +5367,8 @@ static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &i, &fd, 0); if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", + map->name, i, targ_prog->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", @@ -5373,7 +5421,6 @@ static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_map *map; - char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; bool retried; @@ -5439,8 +5486,7 @@ retry: err = bpf_object__populate_internal_map(obj, map); if (err < 0) goto err_out; - } - if (map->def.type == BPF_MAP_TYPE_ARENA) { + } else if (map->def.type == BPF_MAP_TYPE_ARENA) { map->mmaped = mmap((void *)(long)map->map_extra, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, @@ -5448,8 +5494,8 @@ retry: if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("map '%s': failed to mmap arena: %d\n", - map->name, err); + pr_warn("map '%s': failed to mmap arena: %s\n", + map->name, errstr(err)); return err; } if (obj->arena_data) { @@ -5471,8 +5517,8 @@ retry: retried = true; goto retry; } - pr_warn("map '%s': failed to auto-pin at '%s': %d\n", - map->name, map->pin_path, err); + pr_warn("map '%s': failed to auto-pin at '%s': %s\n", + map->name, map->pin_path, errstr(err)); goto err_out; } } @@ -5481,8 +5527,7 @@ retry: return 0; err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); @@ -5606,7 +5651,7 @@ static int load_module_btfs(struct bpf_object *obj) } if (err) { err = -errno; - pr_warn("failed to iterate BTF objects: %d\n", err); + pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); return err; } @@ -5615,7 +5660,7 @@ static int load_module_btfs(struct bpf_object *obj) if (errno == ENOENT) continue; /* expected race: BTF was unloaded */ err = -errno; - pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); return err; } @@ -5627,7 +5672,7 @@ static int load_module_btfs(struct bpf_object *obj) err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; - pr_warn("failed to get BTF object #%d info: %d\n", id, err); + pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); goto err_out; } @@ -5640,8 +5685,8 @@ static int load_module_btfs(struct bpf_object *obj) btf = btf_get_from_fd(fd, obj->btf_vmlinux); err = libbpf_get_error(btf); if (err) { - pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", - name, id, err); + pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", + name, id, errstr(err)); goto err_out; } @@ -5870,7 +5915,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); err = libbpf_get_error(obj->btf_vmlinux_override); if (err) { - pr_warn("failed to parse target BTF: %d\n", err); + pr_warn("failed to parse target BTF: %s\n", errstr(err)); return err; } } @@ -5930,8 +5975,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = record_relo_core(prog, rec, insn_idx); if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", + prog->name, i, errstr(err)); goto out; } @@ -5940,15 +5985,15 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", + prog->name, i, errstr(err)); goto out; } err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog->name, i, insn_idx, err); + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", + prog->name, i, insn_idx, errstr(err)); goto out; } } @@ -6216,8 +6261,8 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, &main_prog->func_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->func_info) { @@ -6244,8 +6289,8 @@ line_info: &main_prog->line_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->line_info) { @@ -7009,8 +7054,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %s\n", + errstr(err)); return err; } bpf_object__sort_relos(obj); @@ -7054,8 +7099,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat err = bpf_object__relocate_calls(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate calls: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate calls: %s\n", + prog->name, errstr(err)); return err; } @@ -7091,16 +7136,16 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate data references: %s\n", + prog->name, errstr(err)); return err; } /* Fix up .BTF.ext information, if necessary */ err = bpf_program_fixup_func_info(obj, prog); if (err) { - pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", + prog->name, errstr(err)); return err; } } @@ -7352,8 +7397,14 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; /* special check for usdt to use uprobe_multi link */ - if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { + /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type + * in prog, and expected_attach_type we set in kernel is from opts, so we + * update both. + */ prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + } if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; @@ -7403,7 +7454,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; - char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL, *tmp; bool own_log_buf = true; @@ -7443,6 +7493,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.expected_attach_type = prog->expected_attach_type; /* specify func_info/line_info only if kernel supports them */ if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { @@ -7466,17 +7517,14 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to prepare load attributes: %s\n", + prog->name, errstr(err)); return err; } insns = prog->insns; insns_cnt = prog->insns_cnt; } - /* allow prog_prepare_load_fn to change expected_attach_type */ - load_attr.expected_attach_type = prog->expected_attach_type; - if (obj->gen_loader) { bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, license, insns, insns_cnt, &load_attr, @@ -7534,9 +7582,8 @@ retry_load: continue; if (bpf_prog_bind_map(ret, map->fd, NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); + prog->name, map->real_name, errstr(errno)); /* Don't fail hard if can't bind rodata. */ } } @@ -7566,8 +7613,7 @@ retry_load: /* post-process verifier log to improve error descriptions */ fixup_verifier_log(prog, log_buf, log_buf_size); - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); pr_perm_msg(ret); if (own_log_buf && log_buf && log_buf[0] != '\0') { @@ -7860,7 +7906,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, obj->license, obj->kern_version, &prog->fd); if (err) { - pr_warn("prog '%s': failed to load: %d\n", prog->name, err); + pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); return err; } } @@ -7894,8 +7940,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object if (prog->sec_def->prog_setup_fn) { err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to initialize: %s\n", + prog->name, errstr(err)); return err; } } @@ -7992,7 +8038,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object_fixup_btf(obj); @@ -8085,7 +8130,7 @@ static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; - pr_warn("failed to open /proc/kallsyms: %d\n", err); + pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); return err; } @@ -8498,8 +8543,15 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-EINVAL); } - if (obj->gen_loader) + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } err = bpf_object_prepare_token(obj); err = err ? : bpf_object__probe_loading(obj); @@ -8562,7 +8614,6 @@ int bpf_object__load(struct bpf_object *obj) static int make_parent_dir(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; char *dname, *dir; int err = 0; @@ -8576,15 +8627,13 @@ static int make_parent_dir(const char *path) free(dname); if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); } return err; } static int check_path(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -8598,8 +8647,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); err = -errno; } free(dname); @@ -8614,7 +8662,6 @@ static int check_path(const char *path) int bpf_program__pin(struct bpf_program *prog, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (prog->fd < 0) { @@ -8632,8 +8679,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (bpf_obj_pin(prog->fd, path)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); return libbpf_err(err); } @@ -8664,7 +8710,6 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) int bpf_map__pin(struct bpf_map *map, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (map == NULL) { @@ -8723,8 +8768,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return 0; out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", errstr(err)); return libbpf_err(err); } @@ -9096,6 +9140,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) if (!gen) return -ENOMEM; gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } @@ -9370,8 +9415,10 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), @@ -9910,8 +9957,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) memset(&info, 0, info_len); err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", + attach_prog_fd, errstr(err)); return err; } @@ -9923,7 +9970,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) btf = btf__load_from_kernel_by_id(info.btf_id); err = libbpf_get_error(btf); if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -10005,8 +10052,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac } err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); if (err < 0) { - pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - prog->name, attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", + prog->name, attach_prog_fd, attach_name, errstr(err)); return err; } *btf_obj_fd = 0; @@ -10025,8 +10072,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac btf_type_id); } if (err) { - pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", - prog->name, attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", + prog->name, attach_name, errstr(err)); return err; } return 0; @@ -10254,14 +10301,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { - pr_warn("map '%s': failed to resize memory-mapped region: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to resize memory-mapped region: %s\n", + bpf_map__name(map), errstr(err)); return err; } err = map_btf_datasec_resize(map, size); if (err && err != -ENOENT) { - pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", + bpf_map__name(map), errstr(err)); map->btf_value_type_id = 0; map->btf_key_type_id = 0; } @@ -10752,7 +10799,6 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { - char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; bool force_ioctl_attach; @@ -10787,9 +10833,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); if (link_fd < 0) { err = -errno; - pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", - prog->name, pfd, - err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); goto err_out; } link->link.fd = link_fd; @@ -10803,7 +10848,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); @@ -10814,7 +10859,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); goto err_out; } @@ -10838,22 +10883,19 @@ struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, */ static int parse_uint_from_file(const char *file, const char *fmt) { - char buf[STRERR_BUFSIZE]; int err, ret; FILE *f; f = fopen(file, "re"); if (!f) { err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to open '%s': %s\n", file, errstr(err)); return err; } err = fscanf(f, fmt, &ret); if (err != 1) { err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to parse '%s': %s\n", file, errstr(err)); fclose(f); return err; } @@ -10897,7 +10939,6 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd; if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) @@ -10910,7 +10951,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (type < 0) { pr_warn("failed to determine %s perf type: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + errstr(type)); return type; } if (retprobe) { @@ -10920,7 +10961,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (bit < 0) { pr_warn("failed to determine %s retprobe bit: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + errstr(bit)); return bit; } attr.config |= 1 << bit; @@ -11049,14 +11090,13 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd, err; err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); @@ -11064,7 +11104,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } @@ -11080,7 +11120,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } return pfd; @@ -11156,7 +11196,6 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; - char errmsg[STRERR_BUFSIZE]; char *legacy_probe = NULL; struct bpf_link *link; size_t offset; @@ -11214,7 +11253,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -11224,7 +11263,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -11360,7 +11399,7 @@ static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) f = fopen(available_functions_file, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_functions_file, err); + pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); return err; } @@ -11435,7 +11474,7 @@ static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) f = fopen(available_path, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_path, err); + pr_warn("failed to open %s: %s\n", available_path, errstr(err)); return err; } @@ -11481,7 +11520,6 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, }; enum bpf_attach_type attach_type; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; const unsigned long *addrs; int err, link_fd, prog_fd; bool retprobe, session; @@ -11549,7 +11587,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -11693,7 +11731,9 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru ret = 0; break; case 3: + opts.session = str_has_pfx(probe_type, "uprobe.session"); opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); ret = libbpf_get_error(*link); break; @@ -11756,15 +11796,15 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", + binary_path, (size_t)offset, errstr(err)); return err; } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { err = type; - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", + binary_path, offset, errstr(err)); goto err_clean_legacy; } @@ -11779,7 +11819,7 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); + pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); goto err_clean_legacy; } return pfd; @@ -11942,10 +11982,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; LIBBPF_OPTS(bpf_link_create_opts, lopts); unsigned long *resolved_offsets = NULL; + enum bpf_attach_type attach_type; int err = 0, link_fd, prog_fd; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; char full_path[PATH_MAX]; + bool retprobe, session; const __u64 *cookies; const char **syms; size_t cnt; @@ -11965,6 +12006,8 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); cookies = OPTS_GET(opts, cookies, NULL); cnt = OPTS_GET(opts, cnt, 0); + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); /* * User can specify 2 mutually exclusive set of inputs: @@ -11993,12 +12036,15 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); } + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + if (func_pattern) { if (!strchr(path, '/')) { err = resolve_full_path(path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, path, errstr(err)); return libbpf_err_ptr(err); } path = full_path; @@ -12016,12 +12062,14 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, offsets = resolved_offsets; } + attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; + lopts.uprobe_multi.path = path; lopts.uprobe_multi.offsets = offsets; lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; lopts.uprobe_multi.cookies = cookies; lopts.uprobe_multi.cnt = cnt; - lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; + lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; if (pid == 0) pid = getpid(); @@ -12035,11 +12083,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, } link->detach = &bpf_link__detach_fd; - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -12058,7 +12106,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const struct bpf_uprobe_opts *opts) { const char *archive_path = NULL, *archive_sep = NULL; - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; char full_path[PATH_MAX]; @@ -12090,8 +12138,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } else if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = full_path; @@ -12157,7 +12205,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } @@ -12168,7 +12216,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -12289,8 +12337,8 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = resolved_path; @@ -12368,14 +12416,13 @@ static int perf_event_open_tracepoint(const char *tp_category, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int tp_id, pfd, err; tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + errstr(tp_id)); return tp_id; } @@ -12390,7 +12437,7 @@ static int perf_event_open_tracepoint(const char *tp_category, err = -errno; pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } return pfd; @@ -12402,7 +12449,6 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p const struct bpf_tracepoint_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; @@ -12415,7 +12461,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p if (pfd < 0) { pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + errstr(pfd)); return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -12424,7 +12470,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p close(pfd); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return libbpf_err_ptr(err); } return link; @@ -12475,7 +12521,6 @@ bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, struct bpf_raw_tracepoint_opts *opts) { LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; @@ -12500,7 +12545,7 @@ bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, pfd = -errno; free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", - prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, tp_name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; @@ -12559,7 +12604,6 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro const struct bpf_trace_opts *opts) { LIBBPF_OPTS(bpf_link_create_opts, link_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; @@ -12584,7 +12628,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro pfd = -errno; free(link); pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; @@ -12625,7 +12669,6 @@ bpf_program_attach_fd(const struct bpf_program *prog, const struct bpf_link_create_opts *opts) { enum bpf_attach_type attach_type; - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; @@ -12647,7 +12690,7 @@ bpf_program_attach_fd(const struct bpf_program *prog, free(link); pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12760,7 +12803,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -12789,7 +12832,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; __u32 target_fd = 0; @@ -12817,7 +12859,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12859,12 +12901,10 @@ struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); if (link_fd < 0) { - char errmsg[STRERR_BUFSIZE]; - link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to netfilter: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -13149,7 +13189,6 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, int cpu, int map_key) { struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; int err; cpu_buf = calloc(1, sizeof(*cpu_buf)); @@ -13165,7 +13204,7 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->fd < 0) { err = -errno; pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -13176,14 +13215,14 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, cpu_buf->base = NULL; err = -errno; pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -13259,7 +13298,6 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, { const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map; - char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; bool *online = NULL; __u32 map_info_len; @@ -13282,7 +13320,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, */ if (err != -EINVAL) { pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + map_fd, errstr(err)); return ERR_PTR(err); } pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", @@ -13312,7 +13350,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (pb->epoll_fd < 0) { err = -errno; pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13343,7 +13381,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = parse_cpu_mask_file(online_cpus_file, &online, &n); if (err) { - pr_warn("failed to get online CPU mask: %d\n", err); + pr_warn("failed to get online CPU mask: %s\n", errstr(err)); goto error; } @@ -13374,7 +13412,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13385,7 +13423,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } j++; @@ -13480,7 +13518,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("error while processing records: %s\n", errstr(err)); return libbpf_err(err); } } @@ -13564,7 +13602,8 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", + i, errstr(err)); return libbpf_err(err); } } @@ -13675,14 +13714,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { err = len ? -errno : -EINVAL; - pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); return err; } if (len >= sizeof(buf)) { @@ -13774,20 +13813,21 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); if (IS_ERR(obj)) { err = PTR_ERR(obj); - pr_warn("failed to initialize skeleton BPF object '%s': %d\n", s->name, err); + pr_warn("failed to initialize skeleton BPF object '%s': %s\n", + s->name, errstr(err)); return libbpf_err(err); } *s->obj = obj; err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } @@ -13817,13 +13857,13 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } @@ -13834,7 +13874,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) map_type = btf__type_by_id(btf, map_type_id); if (!btf_is_datasec(map_type)) { - pr_warn("type for map '%1$s' is not a datasec: %2$s", + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", bpf_map__name(map), __btf_kind_str(btf_kind(map_type))); return libbpf_err(-EINVAL); @@ -13870,53 +13910,18 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) err = bpf_object__load(*s->obj); if (err) { - pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; struct bpf_map *map = *map_skel->map; - size_t mmap_sz = bpf_map_mmap_sz(map); - int prot, map_fd = map->fd; - void **mmaped = map_skel->mmaped; - if (!mmaped) + if (!map_skel->mmaped) continue; - if (!(map->def.map_flags & BPF_F_MMAPABLE)) { - *mmaped = NULL; - continue; - } - - if (map->def.type == BPF_MAP_TYPE_ARENA) { - *mmaped = map->mmaped; - continue; - } - - if (map->def.map_flags & BPF_F_RDONLY_PROG) - prot = PROT_READ; - else - prot = PROT_READ | PROT_WRITE; - - /* Remap anonymous mmap()-ed "map initialization image" as - * a BPF map-backed mmap()-ed memory, but preserving the same - * memory address. This will cause kernel to change process' - * page table to point to a different piece of kernel memory, - * but from userspace point of view memory address (and its - * contents, being identical at this point) will stay the - * same. This mapping will be released by bpf_object__close() - * as per normal clean up procedure, so we don't need to worry - * about it from skeleton's clean up perspective. - */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); - if (*mmaped == MAP_FAILED) { - err = -errno; - *mmaped = NULL; - pr_warn("failed to re-mmap() map '%s': %d\n", - bpf_map__name(map), err); - return libbpf_err(err); - } + *map_skel->mmaped = map->mmaped; } return 0; @@ -13944,8 +13949,8 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("prog '%s': failed to auto-attach: %d\n", - bpf_program__name(prog), err); + pr_warn("prog '%s': failed to auto-attach: %s\n", + bpf_program__name(prog), errstr(err)); return libbpf_err(err); } @@ -13988,7 +13993,8 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) *link = bpf_map__attach_struct_ops(map); if (!*link) { err = -errno; - pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); + pr_warn("map '%s': failed to auto-attach: %s\n", + bpf_map__name(map), errstr(err)); return libbpf_err(err); } } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 91484303849c..b2ce3a72b11d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -577,10 +577,12 @@ struct bpf_uprobe_multi_opts { size_t cnt; /* create return uprobes */ bool retprobe; + /* create session kprobes */ + bool session; size_t :0; }; -#define bpf_uprobe_multi_opts__last_field retprobe +#define bpf_uprobe_multi_opts__last_field session /** * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0096e483f7eb..54b6f312cfa8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -421,6 +421,8 @@ LIBBPF_1.5.0 { global: btf__distill_base; btf__relocate; + btf_ext__endianness; + btf_ext__set_endianness; bpf_map__autoattach; bpf_map__set_autoattach; bpf_object__token_fd; @@ -428,3 +430,6 @@ LIBBPF_1.5.0 { ring__consume_n; ring_buffer__consume_n; } LIBBPF_1.4.0; + +LIBBPF_1.6.0 { +} LIBBPF_1.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 408df59e0771..de498e2dd6b0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include <stdlib.h> +#include <byteswap.h> #include <limits.h> #include <errno.h> #include <linux/err.h> @@ -448,11 +449,11 @@ struct btf_ext_info { * * The func_info subsection layout: * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 + * struct btf_ext_info_sec for section #1 * a list of bpf_func_info records for section #1 * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h * but may not be identical - * struct btf_sec_func_info for section #2 + * struct btf_ext_info_sec for section #2 * a list of bpf_func_info records for section #2 * ...... * @@ -484,6 +485,8 @@ struct btf_ext { struct btf_ext_header *hdr; void *data; }; + void *data_swapped; + bool swapped_endian; struct btf_ext_info func_info; struct btf_ext_info line_info; struct btf_ext_info core_relo_info; @@ -511,6 +514,32 @@ struct bpf_line_info_min { __u32 line_col; }; +/* Functions to byte-swap info records */ + +typedef void (*info_rec_bswap_fn)(void *); + +static inline void bpf_func_info_bswap(struct bpf_func_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); +} + +static inline void bpf_line_info_bswap(struct bpf_line_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->file_name_off = bswap_32(i->file_name_off); + i->line_off = bswap_32(i->line_off); + i->line_col = bswap_32(i->line_col); +} + +static inline void bpf_core_relo_bswap(struct bpf_core_relo *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); + i->access_str_off = bswap_32(i->access_str_off); + i->kind = bswap_32(i->kind); +} + enum btf_field_iter_kind { BTF_FIELD_ITER_IDS, BTF_FIELD_ITER_STRS, @@ -588,6 +617,16 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + /* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. * Original FD is not closed or altered in any other way. * Preserves original FD value, if it's invalid (negative). diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index d6e5eff967cb..28c58fb17250 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 5 +#define LIBBPF_MINOR_VERSION 6 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index e0005c6ade88..cf71d149fe26 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -20,6 +20,7 @@ #include "btf.h" #include "libbpf_internal.h" #include "strset.h" +#include "str_error.h" #define BTF_EXTERN_SEC ".extern" @@ -135,6 +136,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -305,7 +307,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (linker->fd < 0) { err = -errno; - pr_warn("failed to create '%s': %d\n", file, err); + pr_warn("failed to create '%s': %s\n", file, errstr(err)); return err; } @@ -324,13 +326,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -396,6 +393,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) pr_warn_elf("failed to create SYMTAB data"); return -EINVAL; } + /* Ensure libelf translates byte-order of symbol records */ + sec->data->d_type = ELF_T_SYM; str_off = strset__add_str(linker->strtab_strs, sec->sec_name); if (str_off < 0) @@ -539,19 +538,21 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif pr_debug("linker: adding object file '%s'...\n", filename); @@ -560,7 +561,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->fd = open(filename, O_RDONLY | O_CLOEXEC); if (obj->fd < 0) { err = -errno; - pr_warn("failed to open file '%s': %d\n", filename, err); + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); return err; } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); @@ -577,11 +578,25 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, pr_warn_elf("failed to get ELF header for %s", filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("unknown byte order of ELF file %s\n", filename); return err; } + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { + err = -EOPNOTSUPP; + pr_warn("byte order mismatch with ELF file %s\n", filename); + return err; + } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { @@ -656,7 +671,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf = btf__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf); if (err) { - pr_warn("failed to parse .BTF from %s: %d\n", filename, err); + pr_warn("failed to parse .BTF from %s: %s\n", + filename, errstr(err)); return err; } sec->skipped = true; @@ -666,7 +682,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err); + pr_warn("failed to parse .BTF.ext from '%s': %s\n", + filename, errstr(err)); return err; } sec->skipped = true; @@ -1109,6 +1126,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1168,6 +1203,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -2415,6 +2454,10 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (glob_sym && glob_sym->var_idx >= 0) { __s64 sz; + /* FUNCs don't have size, nothing to update */ + if (btf_is_func(t)) + continue; + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; /* Because underlying BTF type might have * changed, so might its size have changed, so @@ -2628,6 +2671,10 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } @@ -2696,6 +2743,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2729,17 +2777,24 @@ static int finalize_btf(struct bpf_linker *linker) err = finalize_btf_ext(linker); if (err) { - pr_warn(".BTF.ext generation failed: %d\n", err); + pr_warn(".BTF.ext generation failed: %s\n", errstr(err)); return err; } opts.btf_ext = linker->btf_ext; err = btf__dedup(linker->btf, &opts); if (err) { - pr_warn("BTF dedup failed: %d\n", err); + pr_warn("BTF dedup failed: %s\n", errstr(err)); return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) @@ -2747,7 +2802,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF ELF section: %d\n", err); + pr_warn("failed to write out .BTF ELF section: %s\n", errstr(err)); return err; } @@ -2759,7 +2814,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF.ext ELF section: %d\n", err); + pr_warn("failed to write out .BTF.ext ELF section: %s\n", errstr(err)); return err; } } @@ -2935,7 +2990,7 @@ static int finalize_btf_ext(struct bpf_linker *linker) err = libbpf_get_error(linker->btf_ext); if (err) { linker->btf_ext = NULL; - pr_warn("failed to parse final .BTF.ext data: %d\n", err); + pr_warn("failed to parse final .BTF.ext data: %s\n", errstr(err)); goto out; } diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 63a4d5ad12d1..7632e9d41827 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1339,7 +1339,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, cands->cands[i].id, cand_spec); if (err < 0) { bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); - pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n", prog_name, relo_idx, i, spec_buf, err); return err; } diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index bfd8dac4c0cc..9702b70da444 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -21,6 +21,7 @@ #include "libbpf.h" #include "libbpf_internal.h" #include "bpf.h" +#include "str_error.h" struct ring { ring_buffer_sample_fn sample_cb; @@ -88,8 +89,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return libbpf_err(err); } @@ -123,8 +124,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->consumer_pos = tmp; @@ -142,8 +143,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->producer_pos = tmp; @@ -156,8 +157,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to epoll add map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } @@ -205,7 +206,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } @@ -458,7 +459,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -474,8 +476,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } rb->consumer_pos = tmp; @@ -494,8 +496,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -506,7 +508,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) rb_epoll->events = EPOLLOUT; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) { err = -errno; - pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to epoll add map fd=%d: %s\n", map_fd, errstr(err)); return err; } @@ -531,7 +533,7 @@ user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts) rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("user ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("user ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0875452521e9..4d5fa079b5d6 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -351,10 +351,11 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.test.ctx_size_in = opts->ctx->sz; err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { + opts->errstr = "failed to execute loader prog"; set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; #ifndef __KERNEL__ errno = -err; diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index 5e6a1e27ddf9..8743049e32b7 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -5,6 +5,10 @@ #include <errno.h> #include "str_error.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -31,3 +35,70 @@ char *libbpf_strerror_r(int err, char *dst, int len) } return dst; } + +const char *errstr(int err) +{ + static __thread char buf[12]; + + if (err > 0) + err = -err; + + switch (err) { + case -E2BIG: return "-E2BIG"; + case -EACCES: return "-EACCES"; + case -EADDRINUSE: return "-EADDRINUSE"; + case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; + case -EAGAIN: return "-EAGAIN"; + case -EALREADY: return "-EALREADY"; + case -EBADF: return "-EBADF"; + case -EBADFD: return "-EBADFD"; + case -EBUSY: return "-EBUSY"; + case -ECANCELED: return "-ECANCELED"; + case -ECHILD: return "-ECHILD"; + case -EDEADLK: return "-EDEADLK"; + case -EDOM: return "-EDOM"; + case -EEXIST: return "-EEXIST"; + case -EFAULT: return "-EFAULT"; + case -EFBIG: return "-EFBIG"; + case -EILSEQ: return "-EILSEQ"; + case -EINPROGRESS: return "-EINPROGRESS"; + case -EINTR: return "-EINTR"; + case -EINVAL: return "-EINVAL"; + case -EIO: return "-EIO"; + case -EISDIR: return "-EISDIR"; + case -ELOOP: return "-ELOOP"; + case -EMFILE: return "-EMFILE"; + case -EMLINK: return "-EMLINK"; + case -EMSGSIZE: return "-EMSGSIZE"; + case -ENAMETOOLONG: return "-ENAMETOOLONG"; + case -ENFILE: return "-ENFILE"; + case -ENODATA: return "-ENODATA"; + case -ENODEV: return "-ENODEV"; + case -ENOENT: return "-ENOENT"; + case -ENOEXEC: return "-ENOEXEC"; + case -ENOLINK: return "-ENOLINK"; + case -ENOMEM: return "-ENOMEM"; + case -ENOSPC: return "-ENOSPC"; + case -ENOTBLK: return "-ENOTBLK"; + case -ENOTDIR: return "-ENOTDIR"; + case -ENOTSUPP: return "-ENOTSUPP"; + case -ENOTTY: return "-ENOTTY"; + case -ENXIO: return "-ENXIO"; + case -EOPNOTSUPP: return "-EOPNOTSUPP"; + case -EOVERFLOW: return "-EOVERFLOW"; + case -EPERM: return "-EPERM"; + case -EPIPE: return "-EPIPE"; + case -EPROTO: return "-EPROTO"; + case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; + case -ERANGE: return "-ERANGE"; + case -EROFS: return "-EROFS"; + case -ESPIPE: return "-ESPIPE"; + case -ESRCH: return "-ESRCH"; + case -ETXTBSY: return "-ETXTBSY"; + case -EUCLEAN: return "-EUCLEAN"; + case -EXDEV: return "-EXDEV"; + default: + snprintf(buf, sizeof(buf), "%d", err); + return buf; + } +} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 626d7ffb03d6..66ffebde0684 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -6,4 +6,11 @@ char *libbpf_strerror_r(int err, char *dst, int len); +/** + * @brief **errstr()** returns string corresponding to numeric errno + * @param err negative numeric errno + * @return pointer to string representation of the errno, that is invalidated + * upon the next call. + */ +const char *errstr(int err); #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 93794f01bb67..5f085736c6c4 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -20,6 +20,7 @@ #include "libbpf_common.h" #include "libbpf_internal.h" #include "hashmap.h" +#include "str_error.h" /* libbpf's USDT support consists of BPF-side state/code and user-space * state/code working together in concert. BPF-side parts are defined in @@ -465,8 +466,8 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, goto proceed; if (!realpath(lib_path, path)) { - pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", - lib_path, -errno); + pr_warn("usdt: failed to get absolute path of '%s' (err %s), using path as is...\n", + lib_path, errstr(-errno)); libbpf_strlcpy(path, lib_path, sizeof(path)); } @@ -475,8 +476,8 @@ proceed: f = fopen(line, "re"); if (!f) { err = -errno; - pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", - line, lib_path, err); + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %s\n", + line, lib_path, errstr(err)); return err; } @@ -606,7 +607,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err = parse_elf_segs(elf, path, &segs, &seg_cnt); if (err) { - pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + pr_warn("usdt: failed to process ELF program segments for '%s': %s\n", + path, errstr(err)); goto err_out; } @@ -708,8 +710,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * if (vma_seg_cnt == 0) { err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { - pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", - pid, path, err); + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %s\n", + pid, path, errstr(err)); goto err_out; } } @@ -1047,8 +1049,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { err = -errno; - pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", - spec_id, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %s\n", + spec_id, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } if (!man->has_bpf_cookie && @@ -1058,9 +1060,9 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", spec_id, usdt_provider, usdt_name, path); } else { - pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %s\n", target->abs_ip, spec_id, usdt_provider, usdt_name, - path, err); + path, errstr(err)); } goto err_out; } @@ -1076,8 +1078,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct target->rel_ip, &opts); err = libbpf_get_error(uprobe_link); if (err) { - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", - i, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %s\n", + i, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } @@ -1099,8 +1101,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct NULL, &opts_multi); if (!link->multi_link) { err = -errno; - pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", - usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %s\n", + usdt_provider, usdt_name, path, errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index 3f26d629b2b4..88c376a8348d 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -223,7 +223,7 @@ struct zip_archive *zip_archive_open(const char *path) if (!archive) { munmap(data, size); return ERR_PTR(-ENOMEM); - }; + } archive->data = data; archive->size = size; diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 69affa251fa7..bb99e493dcd1 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index c6d67fc9e57e..83c43dc13313 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -47,6 +47,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + } } else if (!evsel->own_cpus || evlist->has_user_cpus || (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { /* @@ -80,11 +94,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel, *n; evlist->needs_map_propagation = true; - perf_evlist__for_each_evsel(evlist, evsel) + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c07160953224..c475319e2e41 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 5cd220a61962..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,8 +60,32 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { @@ -58,6 +108,10 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index eb896d30545b..555d617c1f50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -807,7 +807,7 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { int nr_opts = 0, nr_group = 0, nr_parent = 0, len; - const struct option *o, *p = opts; + const struct option *o = NULL, *p = opts; struct option *opt, *ordered = NULL, *group; /* flatten the options that have parents */ diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 4e3a557a2f37..0a764c25c384 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -2,6 +2,7 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> +#include <ctype.h> #include <fcntl.h> #include <string.h> #include <linux/string.h> @@ -217,8 +218,40 @@ static int wait_or_whine(struct child_process *cmd, bool block) int check_if_command_finished(struct child_process *cmd) { +#ifdef __linux__ + char filename[FILENAME_MAX + 12]; + char status_line[256]; + FILE *status_file; + + /* + * Check by reading /proc/<pid>/status as calling waitpid causes + * stdout/stderr to be closed and data lost. + */ + sprintf(filename, "/proc/%d/status", cmd->pid); + status_file = fopen(filename, "r"); + if (status_file == NULL) { + /* Open failed assume finish_command was called. */ + return true; + } + while (fgets(status_line, sizeof(status_line), status_file) != NULL) { + char *p; + + if (strncmp(status_line, "State:", 6)) + continue; + + fclose(status_file); + p = status_line + 6; + while (isspace(*p)) + p++; + return *p == 'Z' ? 1 : 0; + } + /* Read failed assume finish_command was called. */ + fclose(status_file); + return 1; +#else wait_or_whine(cmd, /*block=*/false); return cmd->finished; +#endif } int finish_command(struct child_process *cmd) diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index dfac76e35ac7..c742b08815dc 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -20,8 +20,8 @@ static __noreturn inline void die(const char *err, ...) va_start(params, err); report(" Fatal: ", err, params); - exit(128); va_end(params); + exit(128); } #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 2d0d255fd0e1..8890fd57b110 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -121,7 +121,9 @@ all: fixdep clean: $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) \ + .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) \ + $(srctree)/tools/$(THERMAL_UAPI) $(LIBTHERMAL_PC): $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c index 73d4d4e8d6ec..4998cec793ed 100644 --- a/tools/lib/thermal/commands.c +++ b/tools/lib/thermal/commands.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" @@ -33,6 +34,11 @@ static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, + + /* Thresholds */ + [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, }; static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) @@ -182,6 +188,48 @@ static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) return THERMAL_SUCCESS; } +static int parse_threshold_get(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_threshold *__tt = NULL; + size_t size = 0; + int rem; + + /* + * The size contains the size of the array and we want to + * access the last element, size - 1. + * + * The variable size is initialized to zero but it will be + * then incremented by the first if() statement. The message + * attributes are ordered, so the first if() statement will be + * always called before the second one. If it happens that is + * not the case, then it is a kernel bug. + */ + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_THRESHOLD], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_TEMP) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].temperature = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_DIRECTION) + __tt[size - 1].direction = nla_get_u32(attr); + } + + if (__tt) + __tt[size].temperature = INT_MAX; + + tz->thresholds = __tt; + + return THERMAL_SUCCESS; +} + static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd, struct genl_info *info, void *arg) @@ -210,6 +258,10 @@ static int handle_netlink(struct nl_cache_ops *unused, ret = parse_tz_get_gov(info, arg); break; + case THERMAL_GENL_CMD_THRESHOLD_GET: + ret = parse_threshold_get(info, arg); + break; + default: return THERMAL_ERROR; } @@ -253,6 +305,34 @@ static struct genl_cmd thermal_cmds[] = { .c_maxattr = THERMAL_GENL_ATTR_MAX, .c_attr_policy = thermal_genl_policy, }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_GET, + .c_name = (char *)"Get thresholds list", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_ADD, + .c_name = (char *)"Add a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_DELETE, + .c_name = (char *)"Delete a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_FLUSH, + .c_name = (char *)"Flush the thresholds", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, }; static struct genl_ops thermal_cmd_ops = { @@ -261,9 +341,41 @@ static struct genl_ops thermal_cmd_ops = { .o_ncmds = ARRAY_SIZE(thermal_cmds), }; -static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, - int flags, void *arg) +struct cmd_param { + int tz_id; + int temp; + int direction; +}; + +typedef int (*cmd_cb_t)(struct nl_msg *, struct cmd_param *); + +static int thermal_genl_tz_id_encode(struct nl_msg *msg, struct cmd_param *p) { + if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) + return -1; + + return 0; +} + +static int thermal_genl_threshold_encode(struct nl_msg *msg, struct cmd_param *p) +{ + if (thermal_genl_tz_id_encode(msg, p)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) + return -1; + + return 0; +} + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, cmd_cb_t cmd_cb, + struct cmd_param *param, + int cmd, int flags, void *arg) +{ + thermal_error_t ret = THERMAL_ERROR; struct nl_msg *msg; void *hdr; @@ -274,45 +386,95 @@ static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, 0, flags, cmd, THERMAL_GENL_VERSION); if (!hdr) - return THERMAL_ERROR; + goto out; - if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) - return THERMAL_ERROR; + if (cmd_cb && cmd_cb(msg, param)) + goto out; if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) - return THERMAL_ERROR; + goto out; + ret = THERMAL_SUCCESS; +out: nlmsg_free(msg); - return THERMAL_SUCCESS; + return ret; } thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_TZ_GET_ID, NLM_F_DUMP | NLM_F_ACK, tz); } thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_CDEV_GET, NLM_F_DUMP | NLM_F_ACK, tc); } thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, - 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TRIP, 0, tz); } thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); } thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_GET, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_ADD, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_DELETE, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_FLUSH, 0, tz); } thermal_error_t thermal_cmd_exit(struct thermal_handler *th) diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c index a7a55d1a0c4c..bd851c869029 100644 --- a/tools/lib/thermal/events.c +++ b/tools/lib/thermal/events.c @@ -94,6 +94,30 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_ADD: + return ops->threshold_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DELETE: + return ops->threshold_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_FLUSH: + return ops->threshold_flush(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_UP: + return ops->threshold_up(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DOWN: + return ops->threshold_down(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + default: return -1; } @@ -101,19 +125,24 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) static void thermal_events_ops_init(struct thermal_events_ops *ops) { - enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; - enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; - enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; - enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; - enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; - enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_ADD] = !!ops->threshold_add; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DELETE] = !!ops->threshold_delete; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = !!ops->threshold_flush; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_UP] = !!ops->threshold_up; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DOWN] = !!ops->threshold_down; } thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h index 1abc560602cf..818ecdfb46e5 100644 --- a/tools/lib/thermal/include/thermal.h +++ b/tools/lib/thermal/include/thermal.h @@ -4,11 +4,20 @@ #define __LIBTHERMAL_H #include <linux/thermal.h> +#include <sys/types.h> #ifndef LIBTHERMAL_API #define LIBTHERMAL_API __attribute__((visibility("default"))) #endif +#ifndef THERMAL_THRESHOLD_WAY_UP +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#endif + +#ifndef THERMAL_THRESHOLD_WAY_DOWN +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 +#endif + #ifdef __cplusplus extern "C" { #endif @@ -31,6 +40,11 @@ struct thermal_events_ops { int (*cdev_delete)(int cdev_id, void *arg); int (*cdev_update)(int cdev_id, int cur_state, void *arg); int (*gov_change)(int tz_id, const char *gov_name, void *arg); + int (*threshold_add)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_delete)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_flush)(int tz_id, void *arg); + int (*threshold_up)(int tz_id, int temp, int prev_temp, void *arg); + int (*threshold_down)(int tz_id, int temp, int prev_temp, void *arg); }; struct thermal_ops { @@ -45,12 +59,18 @@ struct thermal_trip { int hyst; }; +struct thermal_threshold { + int temperature; + int direction; +}; + struct thermal_zone { int id; int temp; char name[THERMAL_NAME_LENGTH]; char governor[THERMAL_NAME_LENGTH]; struct thermal_trip *trip; + struct thermal_threshold *thresholds; }; struct thermal_cdev { @@ -74,12 +94,16 @@ typedef int (*cb_tt_t)(struct thermal_trip *, void *); typedef int (*cb_tc_t)(struct thermal_cdev *, void *); +typedef int (*cb_th_t)(struct thermal_threshold *, void *); + LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); +LIBTHERMAL_API int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg); + LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, const char *name); @@ -124,6 +148,22 @@ LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler * LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz); +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz); + /* * Netlink thermal samples */ diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map index d5e77738c7a4..d657176aa47f 100644 --- a/tools/lib/thermal/libthermal.map +++ b/tools/lib/thermal/libthermal.map @@ -4,6 +4,7 @@ LIBTHERMAL_0.0.1 { for_each_thermal_zone; for_each_thermal_trip; for_each_thermal_cdev; + for_each_thermal_threshold; thermal_zone_find_by_name; thermal_zone_find_by_id; thermal_zone_discover; @@ -17,6 +18,10 @@ LIBTHERMAL_0.0.1 { thermal_cmd_get_trip; thermal_cmd_get_governor; thermal_cmd_get_temp; + thermal_cmd_threshold_get; + thermal_cmd_threshold_add; + thermal_cmd_threshold_delete; + thermal_cmd_threshold_flush; thermal_sampling_init; thermal_sampling_handle; thermal_sampling_fd; diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c index 70577423a9f0..f67c1f9ea1d7 100644 --- a/tools/lib/thermal/sampling.c +++ b/tools/lib/thermal/sampling.c @@ -16,6 +16,8 @@ static int handle_thermal_sample(struct nl_msg *n, void *arg) struct thermal_handler_param *thp = arg; struct thermal_handler *th = thp->th; + arg = thp->arg; + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); switch (genlhdr->cmd) { diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c index 72a76dc205bc..6f02e3539159 100644 --- a/tools/lib/thermal/thermal.c +++ b/tools/lib/thermal/thermal.c @@ -1,10 +1,24 @@ // SPDX-License-Identifier: LGPL-2.1+ // Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> #include <stdio.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" +int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg) +{ + int i, ret = 0; + + if (!th) + return 0; + + for (i = 0; th[i].temperature != INT_MAX; i++) + ret |= cb(&th[i], arg); + + return ret; +} + int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) { int i, ret = 0; @@ -80,6 +94,9 @@ static int __thermal_zone_discover(struct thermal_zone *tz, void *th) if (thermal_cmd_get_trip(th, tz) < 0) return -1; + if (thermal_cmd_threshold_get(th, tz)) + return -1; + if (thermal_cmd_get_governor(th, tz)) return -1; diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index fa050d5a48cd..bcac7ebfb51f 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -22,6 +22,7 @@ #include <time.h> #include <setjmp.h> #include <signal.h> +#include <inttypes.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> @@ -391,9 +392,9 @@ static void show_page_range(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voff); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup0); + printf("@%" PRIu64 "\t", cgroup0); if (opt_list_mapcnt) - printf("%lu\t", mapcnt0); + printf("%" PRIu64 "\t", mapcnt0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } @@ -419,9 +420,9 @@ static void show_page(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voffset); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup); + printf("@%" PRIu64 "\t", cgroup); if (opt_list_mapcnt) - printf("%lu\t", mapcnt); + printf("%" PRIu64 "\t", mapcnt); printf("%lx\t%s\n", offset, page_flag_name(flags)); } diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c index e1f264444342..880e36df0c11 100644 --- a/tools/mm/page_owner_sort.c +++ b/tools/mm/page_owner_sort.c @@ -377,6 +377,7 @@ static char *get_comm(char *buf) if (errno != 0) { if (debug_on) fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + free(comm_str); return NULL; } diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c index cfaeaea71042..1433eff99feb 100644 --- a/tools/mm/slabinfo.c +++ b/tools/mm/slabinfo.c @@ -21,7 +21,7 @@ #include <regex.h> #include <errno.h> -#define MAX_SLABS 500 +#define MAX_SLABS 2000 #define MAX_ALIASES 500 #define MAX_NODES 1024 @@ -1228,6 +1228,8 @@ static void read_slab_dir(void) continue; switch (de->d_type) { case DT_LNK: + if (alias - aliasinfo == MAX_ALIASES) + fatal("Too many aliases\n"); alias->name = strdup(de->d_name); count = readlink(de->d_name, buffer, sizeof(buffer)-1); @@ -1242,6 +1244,8 @@ static void read_slab_dir(void) alias++; break; case DT_DIR: + if (slab - slabinfo == MAX_SLABS) + fatal("Too many slabs\n"); if (chdir(de->d_name)) fatal("Unable to access slab %s\n", slab->name); slab->name = strdup(de->d_name); @@ -1297,7 +1301,9 @@ static void read_slab_dir(void) slab->cpu_partial_free = get_obj("cpu_partial_free"); slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); + if (chdir("..")) + fatal("Unable to chdir from slab ../%s\n", + slab->name); if (slab->name[0] == ':') alias_targets++; slab++; @@ -1310,10 +1316,6 @@ static void read_slab_dir(void) slabs = slab - slabinfo; actual_slabs = slabs; aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); } static void output_slabs(void) diff --git a/tools/net/sunrpc/extract.sh b/tools/net/sunrpc/extract.sh new file mode 100755 index 000000000000..f944066f25bc --- /dev/null +++ b/tools/net/sunrpc/extract.sh @@ -0,0 +1,11 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Extract an RPC protocol specification from an RFC document. +# The version of this script comes from RFC 8166. +# +# Usage: +# $ extract.sh < rfcNNNN.txt > protocol.x +# + +grep '^ *///' | sed 's?^ */// ??' | sed 's?^ *///$??' diff --git a/tools/net/sunrpc/xdrgen/README b/tools/net/sunrpc/xdrgen/README index 92f7738ad50c..27218a78ab40 100644 --- a/tools/net/sunrpc/xdrgen/README +++ b/tools/net/sunrpc/xdrgen/README @@ -150,6 +150,23 @@ Pragma directives specify exceptions to the normal generation of encoding and decoding functions. Currently one directive is implemented: "public". +Pragma big_endian +------ ---------- + + pragma big_endian <enum> ; + +For variables that might contain only a small number values, it +is more efficient to avoid the byte-swap when encoding or decoding +on little-endian machines. Such is often the case with error status +codes. For example: + + pragma big_endian nfsstat3; + +In this case, when generating an XDR struct or union containing a +field of type "nfsstat3", xdrgen will make the type of that field +"__be32" instead of "enum nfsstat3". XDR unions then switch on the +non-byte-swapped value of that field. + Pragma exclude ------ ------- diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py index fd2457461274..b98574a36a4a 100644 --- a/tools/net/sunrpc/xdrgen/generators/__init__.py +++ b/tools/net/sunrpc/xdrgen/generators/__init__.py @@ -111,3 +111,7 @@ class SourceGenerator: def emit_encoder(self, node: _XdrAst) -> None: """Emit one encoder function for this XDR type""" raise NotImplementedError("Encoder generation not supported") + + def emit_maxsize(self, node: _XdrAst) -> None: + """Emit one maxsize macro for this XDR type""" + raise NotImplementedError("Maxsize macro generation not supported") diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index 855e43f4ae38..e62f715d3996 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -4,7 +4,7 @@ """Generate code to handle XDR enum types""" from generators import SourceGenerator, create_jinja2_environment -from xdr_ast import _XdrEnum, public_apis +from xdr_ast import _XdrEnum, public_apis, big_endian, get_header_name class XdrEnumGenerator(SourceGenerator): @@ -18,7 +18,7 @@ class XdrEnumGenerator(SourceGenerator): def emit_declaration(self, node: _XdrEnum) -> None: """Emit one declaration pair for an XDR enum type""" if node.name in public_apis: - template = self.environment.get_template("declaration/close.j2") + template = self.environment.get_template("declaration/enum.j2") print(template.render(name=node.name)) def emit_definition(self, node: _XdrEnum) -> None: @@ -30,15 +30,35 @@ class XdrEnumGenerator(SourceGenerator): for enumerator in node.enumerators: print(template.render(name=enumerator.name, value=enumerator.value)) - template = self.environment.get_template("definition/close.j2") + if node.name in big_endian: + template = self.environment.get_template("definition/close_be.j2") + else: + template = self.environment.get_template("definition/close.j2") print(template.render(name=node.name)) def emit_decoder(self, node: _XdrEnum) -> None: """Emit one decoder function for an XDR enum type""" - template = self.environment.get_template("decoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("decoder/enum_be.j2") + else: + template = self.environment.get_template("decoder/enum.j2") print(template.render(name=node.name)) def emit_encoder(self, node: _XdrEnum) -> None: """Emit one encoder function for an XDR enum type""" - template = self.environment.get_template("encoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("encoder/enum_be.j2") + else: + template = self.environment.get_template("encoder/enum.j2") print(template.render(name=node.name)) + + def emit_maxsize(self, node: _XdrEnum) -> None: + """Emit one maxsize macro for an XDR enum type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = self.environment.get_template("maxsize/enum.j2") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) diff --git a/tools/net/sunrpc/xdrgen/generators/pointer.py b/tools/net/sunrpc/xdrgen/generators/pointer.py index b0b27f1819c8..6dbda60ad2db 100644 --- a/tools/net/sunrpc/xdrgen/generators/pointer.py +++ b/tools/net/sunrpc/xdrgen/generators/pointer.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrPointer, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_pointer_declaration(environment: Environment, node: _XdrPointer) -> None: @@ -46,7 +46,7 @@ def emit_pointer_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_pointer_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_pointer_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_pointer_encoder(environment: Environment, node: _XdrPointer) -> None: print(template.render()) +def emit_pointer_maxsize(environment: Environment, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "pointer") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrPointerGenerator(SourceGenerator): """Generate source code for XDR pointer""" @@ -270,3 +282,7 @@ class XdrPointerGenerator(SourceGenerator): def emit_encoder(self, node: _XdrPointer) -> None: """Emit one encoder function for an XDR pointer type""" emit_pointer_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + emit_pointer_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/struct.py b/tools/net/sunrpc/xdrgen/generators/struct.py index b694cd470829..64911de46f62 100644 --- a/tools/net/sunrpc/xdrgen/generators/struct.py +++ b/tools/net/sunrpc/xdrgen/generators/struct.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrStruct, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_struct_declaration(environment: Environment, node: _XdrStruct) -> None: @@ -46,7 +46,7 @@ def emit_struct_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_struct_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_struct_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_struct_encoder(environment: Environment, node: _XdrStruct) -> None: print(template.render()) +def emit_struct_maxsize(environment: Environment, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "struct") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrStructGenerator(SourceGenerator): """Generate source code for XDR structs""" @@ -270,3 +282,7 @@ class XdrStructGenerator(SourceGenerator): def emit_encoder(self, node: _XdrStruct) -> None: """Emit one encoder function for an XDR struct type""" emit_struct_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + emit_struct_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/typedef.py b/tools/net/sunrpc/xdrgen/generators/typedef.py index 85a1b2303333..fab72e9d6915 100644 --- a/tools/net/sunrpc/xdrgen/generators/typedef.py +++ b/tools/net/sunrpc/xdrgen/generators/typedef.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrTypedef, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrTypedef, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrVoid, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> None: @@ -28,7 +28,7 @@ def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "declaration", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -74,7 +74,7 @@ def emit_type_definition(environment: Environment, node: _XdrDeclaration) -> Non classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "definition", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -119,7 +119,7 @@ def emit_typedef_decoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "decoder", node.template) print( template.render( @@ -180,7 +180,7 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "encoder", node.template) print( template.render( @@ -230,6 +230,18 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non raise NotImplementedError("typedef: type not recognized") +def emit_typedef_maxsize(environment: Environment, node: _XdrDeclaration) -> None: + """Emit a maxsize macro for an XDR typedef""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", node.template) + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrTypedefGenerator(SourceGenerator): """Generate source code for XDR typedefs""" @@ -253,3 +265,7 @@ class XdrTypedefGenerator(SourceGenerator): def emit_encoder(self, node: _XdrTypedef) -> None: """Emit one encoder function for an XDR typedef""" emit_typedef_encoder(self.environment, node.declaration) + + def emit_maxsize(self, node: _XdrTypedef) -> None: + """Emit one maxsize macro for an XDR typedef""" + emit_typedef_maxsize(self.environment, node.declaration) diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py index 7974967bbb9f..2cca00e279cd 100644 --- a/tools/net/sunrpc/xdrgen/generators/union.py +++ b/tools/net/sunrpc/xdrgen/generators/union.py @@ -8,8 +8,8 @@ from jinja2 import Environment from generators import SourceGenerator from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid -from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis +from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name +from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian def emit_union_declaration(environment: Environment, node: _XdrUnion) -> None: @@ -77,13 +77,18 @@ def emit_union_switch_spec_decoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_decoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_decoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit decoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "decoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "decoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "decoder", "case_spec") for case in node.values: print(template.render(case=case)) @@ -136,7 +141,11 @@ def emit_union_decoder(environment: Environment, node: _XdrUnion) -> None: emit_union_switch_spec_decoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_decoder(environment, case) + emit_union_case_spec_decoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_decoder(environment, node) @@ -153,17 +162,21 @@ def emit_union_switch_spec_encoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_encoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_encoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit encoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "encoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "encoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "encoder", "case_spec") for case in node.values: print(template.render(case=case)) - assert isinstance(node.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", node.arm.template) print( template.render( @@ -192,7 +205,6 @@ def emit_union_default_spec_encoder(environment: Environment, node: _XdrUnion) - print(template.render()) return - assert isinstance(default_case.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", default_case.arm.template) print( template.render( @@ -210,7 +222,11 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: emit_union_switch_spec_encoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_encoder(environment, case) + emit_union_case_spec_encoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_encoder(environment, node) @@ -218,6 +234,18 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: print(template.render()) +def emit_union_maxsize(environment: Environment, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "union") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrUnionGenerator(SourceGenerator): """Generate source code for XDR unions""" @@ -241,3 +269,7 @@ class XdrUnionGenerator(SourceGenerator): def emit_encoder(self, node: _XdrUnion) -> None: """Emit one encoder function for an XDR union""" emit_union_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union""" + emit_union_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/grammars/xdr.lark b/tools/net/sunrpc/xdrgen/grammars/xdr.lark index f3c4552e548d..7c2c1b8c86d1 100644 --- a/tools/net/sunrpc/xdrgen/grammars/xdr.lark +++ b/tools/net/sunrpc/xdrgen/grammars/xdr.lark @@ -3,7 +3,7 @@ declaration : "opaque" identifier "[" value "]" -> fixed_length_opaque | "opaque" identifier "<" [ value ] ">" -> variable_length_opaque - | "string" identifier "<" [ value ] ">" -> variable_length_string + | "string" identifier "<" [ value ] ">" -> string | type_specifier identifier "[" value "]" -> fixed_length_array | type_specifier identifier "<" [ value ] ">" -> variable_length_array | type_specifier "*" identifier -> optional_data @@ -87,12 +87,14 @@ procedure_def : type_specifier identifier "(" type_specifier ")" "=" c pragma_def : "pragma" directive identifier [ identifier ] ";" -directive : exclude_directive +directive : big_endian_directive + | exclude_directive | header_directive | pages_directive | public_directive | skip_directive +big_endian_directive : "big_endian" exclude_directive : "exclude" header_directive : "header" pages_directive : "pages" diff --git a/tools/net/sunrpc/xdrgen/subcmds/definitions.py b/tools/net/sunrpc/xdrgen/subcmds/definitions.py index 5cd13d53221f..c956e27f37c0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/definitions.py +++ b/tools/net/sunrpc/xdrgen/subcmds/definitions.py @@ -28,9 +28,7 @@ from xdr_parse import xdr_parser, set_xdr_annotate logger.setLevel(logging.INFO) -def emit_header_definitions( - root: Specification, language: str, peer: str -) -> None: +def emit_header_definitions(root: Specification, language: str, peer: str) -> None: """Emit header definitions""" for definition in root.definitions: if isinstance(definition.value, _XdrConstant): @@ -52,6 +50,25 @@ def emit_header_definitions( gen.emit_definition(definition.value) +def emit_header_maxsize(root: Specification, language: str, peer: str) -> None: + """Emit header maxsize macros""" + print("") + for definition in root.definitions: + if isinstance(definition.value, _XdrEnum): + gen = XdrEnumGenerator(language, peer) + elif isinstance(definition.value, _XdrPointer): + gen = XdrPointerGenerator(language, peer) + elif isinstance(definition.value, _XdrTypedef): + gen = XdrTypedefGenerator(language, peer) + elif isinstance(definition.value, _XdrStruct): + gen = XdrStructGenerator(language, peer) + elif isinstance(definition.value, _XdrUnion): + gen = XdrUnionGenerator(language, peer) + else: + continue + gen.emit_maxsize(definition.value) + + def handle_parse_error(e: UnexpectedInput) -> bool: """Simple parse error reporting, no recovery attempted""" print(e) @@ -71,6 +88,7 @@ def subcmd(args: Namespace) -> int: gen.emit_definition(args.filename, ast) emit_header_definitions(ast, args.language, args.peer) + emit_header_maxsize(ast, args.language, args.peer) gen = XdrHeaderBottomGenerator(args.language, args.peer) gen.emit_definition(args.filename, ast) diff --git a/tools/net/sunrpc/xdrgen/subcmds/source.py b/tools/net/sunrpc/xdrgen/subcmds/source.py index 00c04ad15b89..2024954748f0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/source.py +++ b/tools/net/sunrpc/xdrgen/subcmds/source.py @@ -83,8 +83,7 @@ def generate_client_source(filename: str, root: Specification, language: str) -> gen = XdrSourceTopGenerator(language, "client") gen.emit_source(filename, root) - # cel: todo: client needs XDR size macros - + print("") for definition in root.definitions: emit_source_encoder(definition.value, language, "client") for definition in root.definitions: diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 deleted file mode 100644 index ab1e576c9531..000000000000 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 +++ /dev/null @@ -1,4 +0,0 @@ -{# SPDX-License-Identifier: GPL-2.0 #} - -bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr); -bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 new file mode 100644 index 000000000000..d1405c7c5354 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 @@ -0,0 +1,4 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr); +bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 index 341d829afeda..6482984f1cb7 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr) +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) { u32 val; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 new file mode 100644 index 000000000000..44c391c10b42 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) +{ + return xdr_stream_decode_be32(xdr, ptr) == 0; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 index 9e62344a976a..a07586cbee17 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 @@ -1,2 +1,3 @@ {# SPDX-License-Identifier: GPL-2.0 #} }; +typedef enum {{ name }} {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 new file mode 100644 index 000000000000..2c18948bddf7 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +}; +typedef __be32 {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 index bd0a770e50f2..67245b9a914d 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value) +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) { return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 new file mode 100644 index 000000000000..fbbcc45948d6 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) +{ + return xdr_stream_encode_be32(xdr, value) == XDR_UNIT; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index d304eccb5c40..aa9940e322db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,10 +13,6 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat != nfs_ok) { - trace_nfs_xdr_status(xdr, (int)result->stat); - return {{ program }}_stat_to_errno(result->stat); - } {% endif %} return 0; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 index e3a802cbc4d7..c5518c519854 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 @@ -3,6 +3,11 @@ // XDR specification file: {{ filename }} // XDR specification modification time: {{ mtime }} -#include <linux/sunrpc/xprt.h> +#include <linux/types.h> -#include "{{ program }}xdr_gen.h" +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/xdrgen/_defs.h> +#include <linux/sunrpc/xdrgen/_builtins.h> +#include <linux/sunrpc/xdrgen/nlm4.h> + +#include <linux/sunrpc/clnt.h> diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 index 3fe3ddd9f359..3fe3ddd9f359 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 index 56c5a17d6a70..56c5a17d6a70 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 index c03c2df8e625..c03c2df8e625 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 index 3d490ff180d0..3d490ff180d0 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 index 83b6e5a14e7f..83b6e5a14e7f 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index dbd3fcf9c957..5233e73c7046 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -12,13 +12,50 @@ from lark.tree import Meta this_module = sys.modules[__name__] +big_endian = [] excluded_apis = [] header_name = "none" public_apis = [] -enums = set() structs = set() pass_by_reference = set() +constants = {} + + +def xdr_quadlen(val: str) -> int: + """Return integer XDR width of an XDR type""" + if val in constants: + octets = constants[val] + else: + octets = int(val) + return int((octets + 3) / 4) + + +symbolic_widths = { + "void": ["XDR_void"], + "bool": ["XDR_bool"], + "int": ["XDR_int"], + "unsigned_int": ["XDR_unsigned_int"], + "long": ["XDR_long"], + "unsigned_long": ["XDR_unsigned_long"], + "hyper": ["XDR_hyper"], + "unsigned_hyper": ["XDR_unsigned_hyper"], +} + +# Numeric XDR widths are tracked in a dictionary that is keyed +# by type_name because sometimes a caller has nothing more than +# the type_name to use to figure out the numeric width. +max_widths = { + "void": 0, + "bool": 1, + "int": 1, + "unsigned_int": 1, + "long": 1, + "unsigned_long": 1, + "hyper": 2, + "unsigned_hyper": 2, +} + @dataclass class _XdrAst(ast_utils.Ast): @@ -51,18 +88,31 @@ class _XdrTypeSpecifier(_XdrAst): """Corresponds to 'type_specifier' in the XDR language grammar""" type_name: str - c_classifier: str + c_classifier: str = "" @dataclass class _XdrDefinedType(_XdrTypeSpecifier): """Corresponds to a type defined by the input specification""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [get_header_name().upper() + "_" + self.type_name + "_sz"] + + def __post_init__(self): + if self.type_name in structs: + self.c_classifier = "struct " + symbolic_widths[self.type_name] = self.symbolic_width() + @dataclass class _XdrBuiltInType(_XdrTypeSpecifier): """Corresponds to a built-in XDR type""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.type_name] + @dataclass class _XdrDeclaration(_XdrAst): @@ -77,6 +127,18 @@ class _XdrFixedLengthOpaque(_XdrDeclaration): size: str template: str = "fixed_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_QUADLEN(" + self.size + ")"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthOpaque(_XdrDeclaration): @@ -86,14 +148,44 @@ class _XdrVariableLengthOpaque(_XdrDeclaration): maxsize: str template: str = "variable_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass -class _XdrVariableLengthString(_XdrDeclaration): +class _XdrString(_XdrDeclaration): """A (NUL-terminated) variable-length string declaration""" name: str maxsize: str - template: str = "variable_length_string" + template: str = "string" + + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass @@ -105,6 +197,19 @@ class _XdrFixedLengthArray(_XdrDeclaration): size: str template: str = "fixed_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) * max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + return ["(" + self.size + " * (" + item_width + "))"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthArray(_XdrDeclaration): @@ -115,6 +220,22 @@ class _XdrVariableLengthArray(_XdrDeclaration): maxsize: str template: str = "variable_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + (xdr_quadlen(self.maxsize) * max_widths[self.spec.type_name]) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + widths.append("(" + self.maxsize + " * (" + item_width + "))") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrOptionalData(_XdrDeclaration): @@ -124,6 +245,20 @@ class _XdrOptionalData(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "optional_data" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_bool"] + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrBasic(_XdrDeclaration): @@ -133,13 +268,34 @@ class _XdrBasic(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "basic" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.spec.type_name] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVoid(_XdrDeclaration): """A void declaration""" + name: str = "void" template: str = "void" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 0 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [] + @dataclass class _XdrConstant(_XdrAst): @@ -148,6 +304,10 @@ class _XdrConstant(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnumerator(_XdrAst): @@ -156,6 +316,10 @@ class _XdrEnumerator(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnum(_XdrAst): @@ -166,6 +330,18 @@ class _XdrEnum(_XdrAst): maximum: int enumerators: List[_XdrEnumerator] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_int"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrStruct(_XdrAst): @@ -174,6 +350,26 @@ class _XdrStruct(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 0 + for field in self.fields: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + for field in self.fields: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrPointer(_XdrAst): @@ -182,6 +378,27 @@ class _XdrPointer(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 1 + for field in self.fields[0:-1]: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + widths += ["XDR_bool"] + for field in self.fields[0:-1]: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrTypedef(_XdrAst): @@ -189,6 +406,23 @@ class _XdrTypedef(_XdrAst): declaration: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return self.declaration.max_width() + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return self.declaration.symbolic_width() + + def __post_init__(self): + if isinstance(self.declaration, _XdrBasic): + new_type = self.declaration + if isinstance(new_type.spec, _XdrDefinedType): + if new_type.spec.type_name in pass_by_reference: + pass_by_reference.add(new_type.name) + max_widths[new_type.name] = self.max_width() + symbolic_widths[new_type.name] = self.symbolic_width() + @dataclass class _XdrCaseSpec(_XdrAst): @@ -216,6 +450,36 @@ class _XdrUnion(_XdrAst): cases: List[_XdrCaseSpec] default: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + return 1 + max_width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + width = case.arm.symbolic_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + width = self.default.arm.symbolic_width() + return symbolic_widths[self.discriminant.name] + width + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _RpcProcedure(_XdrAst): @@ -290,24 +554,13 @@ class ParseToAst(Transformer): return _XdrConstantValue(value) def type_specifier(self, children): - """Instantiate one type_specifier object""" - c_classifier = "" + """Instantiate one _XdrTypeSpecifier object""" if isinstance(children[0], _XdrIdentifier): name = children[0].symbol - if name in enums: - c_classifier = "enum " - if name in structs: - c_classifier = "struct " - return _XdrDefinedType( - type_name=name, - c_classifier=c_classifier, - ) - - token = children[0].data - return _XdrBuiltInType( - type_name=token.value, - c_classifier=c_classifier, - ) + return _XdrDefinedType(type_name=name) + + name = children[0].data.value + return _XdrBuiltInType(type_name=name) def constant_def(self, children): """Instantiate one _XdrConstant object""" @@ -320,7 +573,6 @@ class ParseToAst(Transformer): def enum(self, children): """Instantiate one _XdrEnum object""" enum_name = children[0].symbol - enums.add(enum_name) i = 0 enumerators = [] @@ -350,15 +602,15 @@ class ParseToAst(Transformer): return _XdrVariableLengthOpaque(name, maxsize) - def variable_length_string(self, children): - """Instantiate one _XdrVariableLengthString declaration object""" + def string(self, children): + """Instantiate one _XdrString declaration object""" name = children[0].symbol if children[1] is not None: maxsize = children[1].value else: maxsize = "0" - return _XdrVariableLengthString(name, maxsize) + return _XdrString(name, maxsize) def fixed_length_array(self, children): """Instantiate one _XdrFixedLengthArray declaration object""" @@ -383,8 +635,6 @@ class ParseToAst(Transformer): """Instantiate one _XdrOptionalData declaration object""" spec = children[0] name = children[1].symbol - structs.add(name) - pass_by_reference.add(name) return _XdrOptionalData(name, spec) @@ -403,8 +653,6 @@ class ParseToAst(Transformer): def struct(self, children): """Instantiate one _XdrStruct object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) fields = children[1].children last_field = fields[-1] @@ -419,11 +667,6 @@ class ParseToAst(Transformer): def typedef(self, children): """Instantiate one _XdrTypedef object""" new_type = children[0] - if isinstance(new_type, _XdrBasic) and isinstance( - new_type.spec, _XdrDefinedType - ): - if new_type.spec.type_name in pass_by_reference: - pass_by_reference.add(new_type.name) return _XdrTypedef(new_type) @@ -445,8 +688,6 @@ class ParseToAst(Transformer): def union(self, children): """Instantiate one _XdrUnion object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) body = children[1] discriminant = body.children[0].children[0] @@ -484,6 +725,8 @@ class ParseToAst(Transformer): """Instantiate one _Pragma object""" directive = children[0].children[0].data match directive: + case "big_endian_directive": + big_endian.append(children[1].symbol) case "exclude_directive": excluded_apis.append(children[1].symbol) case "header_directive": diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen index 95f303b2861b..43762be39252 100755 --- a/tools/net/sunrpc/xdrgen/xdrgen +++ b/tools/net/sunrpc/xdrgen/xdrgen @@ -128,5 +128,7 @@ There is NO WARRANTY, to the extent permitted by law.""", try: if __name__ == "__main__": sys.exit(main()) -except (SystemExit, KeyboardInterrupt, BrokenPipeError): +except SystemExit: + sys.exit(0) +except (KeyboardInterrupt, BrokenPipeError): sys.exit(1) diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index b8481f401376..41d9fa5c818d 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -3,9 +3,11 @@ import argparse import json +import pathlib import pprint -import time +import sys +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily, Netlink, NlError @@ -43,7 +45,10 @@ def main(): group.add_argument('--list-ops', action='store_true') group.add_argument('--list-msgs', action='store_true') - parser.add_argument('--sleep', dest='sleep', type=int) + parser.add_argument('--duration', dest='duration', type=int, + help='when subscribed, watch for DURATION seconds') + parser.add_argument('--sleep', dest='duration', type=int, + help='alias for duration') parser.add_argument('--subscribe', dest='ntf', type=str) parser.add_argument('--replace', dest='flags', action='append_const', const=Netlink.NLM_F_REPLACE) @@ -80,9 +85,6 @@ def main(): if args.ntf: ynl.ntf_subscribe(args.ntf) - if args.sleep: - time.sleep(args.sleep) - if args.list_ops: for op_name, op in ynl.ops.items(): print(op_name, " [", ", ".join(op.modes), "]") @@ -106,8 +108,11 @@ def main(): exit(1) if args.ntf: - ynl.check_ntf() - output(ynl.async_msg_queue) + try: + for msg in ynl.poll_ntf(duration=args.duration): + output(msg) + except KeyboardInterrupt: + pass if __name__ == "__main__": diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/ethtool.py index 63c471f075ab..ebb0a11f67bf 100755 --- a/tools/net/ynl/ethtool.py +++ b/tools/net/ynl/ethtool.py @@ -3,11 +3,13 @@ import argparse import json +import pathlib import pprint import sys import re import os +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily def args_to_req(ynl, op_name, args, req): diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 713f5fb9cc2d..7db5240de58a 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ -I../lib/ -idirafter $(UAPI_PATH) ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 2887cc5de530..94c49cca3dca 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index b6d6f8aef423..a745739655ad 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -131,6 +131,9 @@ class SpecEnumSet(SpecElement): def has_doc(self): if 'doc' in self.yaml: return True + return self.has_entry_doc() + + def has_entry_doc(self): for entry in self.entries.values(): if entry.has_doc(): return True diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index c22c22bf2cb7..eea29359a899 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -12,6 +12,9 @@ import sys import yaml import ipaddress import uuid +import queue +import selectors +import time from .nlspec import SpecFamily @@ -489,7 +492,7 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_GET_STRICT_CHK, 1) self.async_msg_ids = set() - self.async_msg_queue = [] + self.async_msg_queue = queue.Queue() for msg in self.msgs.values(): if msg.is_async: @@ -553,10 +556,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' - sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) + sub_space = attr['nested-attributes'] + sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], - subname, subvalue, sub_attrs) + attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs) elif attr["type"] == 'flag': if not value: # If value is absent or false then skip attribute creation. @@ -903,7 +906,7 @@ class YnlFamily(SpecFamily): msg['name'] = op['name'] msg['msg'] = attrs - self.async_msg_queue.append(msg) + self.async_msg_queue.put(msg) def check_ntf(self): while True: @@ -925,11 +928,30 @@ class YnlFamily(SpecFamily): decoded = self.nlproto.decode(self, nl_msg, None) if decoded.cmd() not in self.async_msg_ids: - print("Unexpected msg id done while checking for ntf", decoded) + print("Unexpected msg id while checking for ntf", decoded) continue self.handle_ntf(decoded) + def poll_ntf(self, duration=None): + start_time = time.time() + selector = selectors.DefaultSelector() + selector.register(self.sock, selectors.EVENT_READ) + + while True: + try: + yield self.async_msg_queue.get_nowait() + except queue.Empty: + if duration is not None: + timeout = start_time + duration - time.time() + if timeout <= 0: + return + else: + timeout = None + events = selector.select(timeout) + if events: + self.check_ntf() + def operation_do_attributes(self, name): """ For a given operation name, find and return a supported diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index e194a7565861..c9494a564da4 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -3,7 +3,7 @@ include ../Makefile.deps CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ -I../lib/ -I../generated/ -idirafter $(UAPI_PATH) ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c index 332f281ee5cb..e5d521320fbf 100644 --- a/tools/net/ynl/samples/page-pool.c +++ b/tools/net/ynl/samples/page-pool.c @@ -118,7 +118,7 @@ int main(int argc, char **argv) name = if_indextoname(s->ifc, ifname); if (name) printf("%8s", name); - printf("[%d]\t", s->ifc); + printf("[%u]\t", s->ifc); } printf("page pools: %u (zombies: %u)\n", diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 717530bc9c52..d8201c4b1520 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -4,12 +4,15 @@ import argparse import collections import filecmp +import pathlib import os import re import shutil +import sys import tempfile import yaml +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry @@ -80,11 +83,21 @@ class Type(SpecAttr): value = self.checks.get(limit, default) if value is None: return value - elif value in self.family.consts: + if isinstance(value, int): + return value + if value in self.family.consts: + raise Exception("Resolving family constants not implemented, yet") + return limit_to_number(value) + + def get_limit_str(self, limit, default=None, suffix=''): + value = self.checks.get(limit, default) + if value is None: + return '' + if isinstance(value, int): + return str(value) + suffix + if value in self.family.consts: return c_upper(f"{self.family['name']}-{value}") - if not isinstance(value, int): - value = limit_to_number(value) - return value + return c_upper(value) def resolve(self): if 'name-prefix' in self.attr: @@ -157,7 +170,10 @@ class Type(SpecAttr): return '{ .type = ' + policy + ', }' def attr_policy(self, cw): - policy = c_upper('nla-' + self.attr['type']) + policy = f'NLA_{c_upper(self.type)}' + if self.attr.get('byte-order') == 'big-endian': + if self.type in {'u16', 'u32'}: + policy = f'NLA_BE{self.type[1:]}' spec = self._attr_policy(policy) cw.p(f"\t[{self.enum_name}] = {spec},") @@ -358,11 +374,11 @@ class TypeScalar(Type): elif 'full-range' in self.checks: return f"NLA_POLICY_FULL_RANGE({policy}, &{c_lower(self.enum_name)}_range)" elif 'range' in self.checks: - return f"NLA_POLICY_RANGE({policy}, {self.get_limit('min')}, {self.get_limit('max')})" + return f"NLA_POLICY_RANGE({policy}, {self.get_limit_str('min')}, {self.get_limit_str('max')})" elif 'min' in self.checks: - return f"NLA_POLICY_MIN({policy}, {self.get_limit('min')})" + return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})" elif 'max' in self.checks: - return f"NLA_POLICY_MAX({policy}, {self.get_limit('max')})" + return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})" return super()._attr_policy(policy) def _attr_typol(self): @@ -413,11 +429,11 @@ class TypeString(Type): def _attr_policy(self, policy): if 'exact-len' in self.checks: - mem = 'NLA_POLICY_EXACT_LEN(' + str(self.get_limit('exact-len')) + ')' + mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')' else: mem = '{ .type = ' + policy if 'max-len' in self.checks: - mem += ', .len = ' + str(self.get_limit('max-len')) + mem += ', .len = ' + self.get_limit_str('max-len') mem += ', }' return mem @@ -464,17 +480,24 @@ class TypeBinary(Type): return f'.type = YNL_PT_BINARY,' def _attr_policy(self, policy): - if 'exact-len' in self.checks: - mem = 'NLA_POLICY_EXACT_LEN(' + str(self.get_limit('exact-len')) + ')' + if len(self.checks) == 0: + pass + elif len(self.checks) == 1: + check_name = list(self.checks)[0] + if check_name not in {'exact-len', 'min-len', 'max-len'}: + raise Exception('Unsupported check for binary type: ' + check_name) else: - mem = '{ ' - if len(self.checks) == 1 and 'min-len' in self.checks: - mem += '.len = ' + str(self.get_limit('min-len')) - elif len(self.checks) == 0: - mem += '.type = NLA_BINARY' - else: - raise Exception('One or more of binary type checks not implemented, yet') - mem += ', }' + raise Exception('More than one check for binary type not implemented, yet') + + if len(self.checks) == 0: + mem = '{ .type = NLA_BINARY, }' + elif 'exact-len' in self.checks: + mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')' + elif 'min-len' in self.checks: + mem = '{ .len = ' + self.get_limit_str('min-len') + ', }' + elif 'max-len' in self.checks: + mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')' + return mem def attr_put(self, ri, var): @@ -2161,9 +2184,9 @@ def print_kernel_policy_ranges(family, cw): cw.block_start(line=f'static const struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =') members = [] if 'min' in attr.checks: - members.append(('min', str(attr.get_limit('min')) + suffix)) + members.append(('min', attr.get_limit_str('min', suffix=suffix))) if 'max' in attr.checks: - members.append(('max', str(attr.get_limit('max')) + suffix)) + members.append(('max', attr.get_limit_str('max', suffix=suffix))) cw.write_struct_init(members) cw.block_end(line=';') cw.nl() @@ -2396,6 +2419,7 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" + hdr_prot = hdr_prot.replace('/', '_') cw.p('#ifndef ' + hdr_prot) cw.p('#define ' + hdr_prot) cw.nl() @@ -2417,11 +2441,15 @@ def render_uapi(family, cw): enum = family.consts[const['name']] if enum.has_doc(): - cw.p('/**') - doc = '' - if 'doc' in enum: - doc = ' - ' + enum['doc'] - cw.write_doc_line(enum.enum_name + doc) + if enum.has_entry_doc(): + cw.p('/**') + doc = '' + if 'doc' in enum: + doc = ' - ' + enum['doc'] + cw.write_doc_line(enum.enum_name + doc) + else: + cw.p('/*') + cw.write_doc_line(enum['doc'], indent=False) for entry in enum.entries.values(): if entry.has_doc(): doc = '@' + entry.c_name + ': ' + entry['doc'] diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index bf7f7f84ac62..f56e27727534 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -24,6 +24,7 @@ LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lel all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ + -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/include \ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index ed6bff0e01dc..fe1362c34564 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -456,10 +456,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (!rex_w) break; - /* skip RIP relative displacement */ - if (is_RIP()) - break; - /* skip nontrivial SIB */ if (have_SIB()) { modrm_rm = sib_base; @@ -467,6 +463,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; } + /* lea disp(%rip), %dst */ + if (is_RIP()) { + insn->type = INSN_LEA_RIP; + break; + } + /* lea disp(%src), %dst */ ADD_OP(op) { op->src.offset = ins.displacement.value; @@ -737,7 +739,10 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; } - insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0; + if (ins.immediate.nbytes) + insn->immediate = ins.immediate.value; + else if (ins.displacement.nbytes) + insn->immediate = ins.displacement.value; return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6604f5d038aa..76060da755b5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3820,9 +3820,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CONTEXT_SWITCH: - if (func && (!next_insn || !next_insn->hint)) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; + if (func) { + if (!next_insn || !next_insn->hint) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + break; } return 0; @@ -4392,6 +4395,51 @@ static bool noendbr_range(struct objtool_file *file, struct instruction *insn) return insn->offset == sym->offset + sym->len; } +static int __validate_ibt_insn(struct objtool_file *file, struct instruction *insn, + struct instruction *dest) +{ + if (dest->type == INSN_ENDBR) { + mark_endbr_used(dest); + return 0; + } + + if (insn_func(dest) && insn_func(insn) && + insn_func(dest)->pfunc == insn_func(insn)->pfunc) { + /* + * Anything from->to self is either _THIS_IP_ or + * IRET-to-self. + * + * There is no sane way to annotate _THIS_IP_ since the + * compiler treats the relocation as a constant and is + * happy to fold in offsets, skewing any annotation we + * do, leading to vast amounts of false-positives. + * + * There's also compiler generated _THIS_IP_ through + * KCOV and such which we have no hope of annotating. + * + * As such, blanket accept self-references without + * issue. + */ + return 0; + } + + /* + * Accept anything ANNOTATE_NOENDBR. + */ + if (dest->noendbr) + return 0; + + /* + * Accept if this is the instruction after a symbol + * that is (no)endbr -- typical code-range usage. + */ + if (noendbr_range(file, dest)) + return 0; + + WARN_INSN(insn, "relocation to !ENDBR: %s", offstr(dest->sec, dest->offset)); + return 1; +} + static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) { struct instruction *dest; @@ -4404,6 +4452,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn * direct/indirect branches: */ switch (insn->type) { + case INSN_CALL: case INSN_CALL_DYNAMIC: case INSN_JUMP_CONDITIONAL: @@ -4413,6 +4462,23 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn case INSN_RETURN: case INSN_NOP: return 0; + + case INSN_LEA_RIP: + if (!insn_reloc(file, insn)) { + /* local function pointer reference without reloc */ + + off = arch_jump_destination(insn); + + dest = find_insn(file, insn->sec, off); + if (!dest) { + WARN_INSN(insn, "corrupt function pointer reference"); + return 1; + } + + return __validate_ibt_insn(file, insn, dest); + } + break; + default: break; } @@ -4423,13 +4489,6 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn reloc_offset(reloc) + 1, (insn->offset + insn->len) - (reloc_offset(reloc) + 1))) { - /* - * static_call_update() references the trampoline, which - * doesn't have (or need) ENDBR. Skip warning in that case. - */ - if (reloc->sym->static_call_tramp) - continue; - off = reloc->sym->offset; if (reloc_type(reloc) == R_X86_64_PC32 || reloc_type(reloc) == R_X86_64_PLT32) @@ -4441,47 +4500,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn if (!dest) continue; - if (dest->type == INSN_ENDBR) { - mark_endbr_used(dest); - continue; - } - - if (insn_func(dest) && insn_func(insn) && - insn_func(dest)->pfunc == insn_func(insn)->pfunc) { - /* - * Anything from->to self is either _THIS_IP_ or - * IRET-to-self. - * - * There is no sane way to annotate _THIS_IP_ since the - * compiler treats the relocation as a constant and is - * happy to fold in offsets, skewing any annotation we - * do, leading to vast amounts of false-positives. - * - * There's also compiler generated _THIS_IP_ through - * KCOV and such which we have no hope of annotating. - * - * As such, blanket accept self-references without - * issue. - */ - continue; - } - - /* - * Accept anything ANNOTATE_NOENDBR. - */ - if (dest->noendbr) - continue; - - /* - * Accept if this is the instruction after a symbol - * that is (no)endbr -- typical code-range usage. - */ - if (noendbr_range(file, dest)) - continue; - - WARN_INSN(insn, "relocation to !ENDBR: %s", offstr(dest->sec, dest->offset)); - - warnings++; + warnings += __validate_ibt_insn(file, insn, dest); } return warnings; @@ -4557,6 +4576,9 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__jump_table") || !strcmp(sec->name, "__mcount_loc") || !strcmp(sec->name, ".kcfi_traps") || + !strcmp(sec->name, ".llvm.call-graph-profile") || + !strcmp(sec->name, ".llvm_bb_addr_map") || + !strcmp(sec->name, "__tracepoints") || strstr(sec->name, "__patchable_function_entries")) continue; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3d27983dc908..6f64d611faea 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -224,12 +224,17 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) if (n) return 0; /* not a hole */ - /* didn't find a symbol for which @offset is after it */ - if (!hole.sym) - return 0; /* not a hole */ + /* + * @offset >= sym->offset + sym->len, find symbol after it. + * When hole.sym is empty, use the first node to compute the hole. + * If there is no symbol in the section, the first node will be NULL, + * in which case, -1 is returned to skip the whole section. + */ + if (hole.sym) + n = rb_next(&hole.sym->node); + else + n = rb_first_cached(&sec->symbol_tree); - /* @offset >= sym->offset + sym->len, find symbol after it */ - n = rb_next(&hole.sym->node); if (!n) return -1; /* until end of address space */ diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 0b303eba660e..d63b46a19f39 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -28,6 +28,7 @@ enum insn_type { INSN_CLD, INSN_TRAP, INSN_ENDBR, + INSN_LEA_RIP, INSN_OTHER, }; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e7da92489167..f37614cc2c1b 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -11,7 +11,6 @@ NORETURN(__ia32_sys_exit) NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) -NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 470258009ddc..7b530d838d40 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -95,7 +95,7 @@ static int run_test(struct pci_test *test) if (test->msinum > 0 && test->msinum <= 32) { ret = ioctl(fd, PCITEST_MSI, test->msinum); - fprintf(stdout, "MSI%d:\t\t", test->msinum); + fprintf(stdout, "MSI%u:\t\t", test->msinum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -104,7 +104,7 @@ static int run_test(struct pci_test *test) if (test->msixnum > 0 && test->msixnum <= 2048) { ret = ioctl(fd, PCITEST_MSIX, test->msixnum); - fprintf(stdout, "MSI-X%d:\t\t", test->msixnum); + fprintf(stdout, "MSI-X%u:\t\t", test->msixnum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -116,7 +116,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_WRITE, ¶m); - fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); + fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -128,7 +128,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_READ, ¶m); - fprintf(stdout, "READ (%7ld bytes):\t\t", test->size); + fprintf(stdout, "READ (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -140,7 +140,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_COPY, ¶m); - fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size); + fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f5b81d439387..5aaf73df6700 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,9 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log -tests/shell/*.shellcheck_log -tests/shell/coresight/*.shellcheck_log -tests/shell/lib/*.shellcheck_log +pmu-events/empty-pmu-events.log +pmu-events/test-empty-pmu-events.c +*.shellcheck_log feature/ libapi/ libbpf/ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 19cc179be9a7..40476b227f8d 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -1,6 +1,6 @@ i synthesize instructions events y synthesize cycles events - b synthesize branches events (branch misses for Arm SPE) + b synthesize branches events c synthesize branches events (calls only) r synthesize branches events (returns only) x synthesize transactions events diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index 0a3eda482307..de2b0b479249 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -187,7 +187,7 @@ groups: 7 llc-access 2 tlb-miss 1K tlb-access - 36 branch-miss + 36 branch 0 remote-access 900 memory diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt index 10f69fb6850b..31741499e786 100644 --- a/tools/perf/Documentation/perf-check.txt +++ b/tools/perf/Documentation/perf-check.txt @@ -47,15 +47,15 @@ feature:: bpf / HAVE_LIBBPF_SUPPORT bpf_skeletons / HAVE_BPF_SKEL debuginfod / HAVE_DEBUGINFOD_SUPPORT - dwarf / HAVE_DWARF_SUPPORT - dwarf_getlocations / HAVE_DWARF_GETLOCATIONS_SUPPORT + dwarf / HAVE_LIBDW_SUPPORT + dwarf_getlocations / HAVE_LIBDW_SUPPORT dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT auxtrace / HAVE_AUXTRACE_SUPPORT libaudit / HAVE_LIBAUDIT_SUPPORT libbfd / HAVE_LIBBFD_SUPPORT libcapstone / HAVE_LIBCAPSTONE_SUPPORT libcrypto / HAVE_LIBCRYPTO_SUPPORT - libdw-dwarf-unwind / HAVE_DWARF_SUPPORT + libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT libelf / HAVE_LIBELF_SUPPORT libnuma / HAVE_LIBNUMA_SUPPORT libopencsd / HAVE_CSTRACE_SUPPORT diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 379f9d7a8ab1..1f668d4724e3 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -247,6 +247,19 @@ annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. + annotate.disassemblers:: + Choose the disassembler to use: "objdump", "llvm", "capstone", + if not specified it will first try, if available, the "llvm" one, + then, if it fails, "capstone", and finally the original "objdump" + based one. + + Choosing a different one is useful when handling some feature that + is known to be best support at some point by one of the options, + to compare the output when in doubt about some bug, etc. + + This can be a list, in order of preference, the first one that works + finishes the process. + annotate.addr2line:: addr2line binary to use for file names and line numbers. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index dea005410ec0..d0c65fad419a 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] +'perf list' [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob] DESCRIPTION @@ -243,6 +243,21 @@ For accessing trace point events perf needs to have read access to /sys/kernel/tracing, even when perf_event_paranoid is in a relaxed setting. +TOOL/HWMON EVENTS +----------------- + +Some events don't have an associated PMU instead reading values +available to software without perf_event_open. As these events don't +support sampling they can only really be read by tools like perf stat. + +Tool events provide times and certain system parameters. Examples +include duration_time, user_time, system_time and num_cpus_online. + +Hwmon events provide easy access to hwmon sysfs data typically in +/sys/class/hwmon. This information includes temperatures, fan speeds +and energy usage. + + TRACING ------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7c66d81ab978..87f864519406 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -391,6 +391,14 @@ OPTIONS This allows to examine the path the program took to each sample. The data collection must have used -b (or -j) and -g. + Also show with some branch flags that can be: + - Predicted: display the average percentage of predicated branches. + (predicated number / total number) + - Abort: display the number of tsx aborted branches. + - Cycles: cycles in basic block. + + - iterations: display the average number of iterations in callchain list. + --addr2line=<path>:: Path to addr2line binary. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 3db64954a267..6dbbddb6464d 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -221,6 +221,14 @@ OPTIONS for 'perf sched timehist' priorities are specified with -: 120-129. A combination of both can also be provided: 0,120-129. +-P:: +--pre-migrations:: + Show pre-migration wait time. pre-migration wait time is the time spent + by a task waiting on a runqueue but not getting the chance to run there + and is migrated to a different runqueue where it is finally run. This + time between sched_wakeup and migrate_task is the pre-migration wait + time. + OPTIONS for 'perf sched replay' ------------------------------ diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 13e37e9385ee..27a1cac6fe76 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -624,7 +624,7 @@ as perf_trace_context.perf_script_context . perf_set_itrace_options(context, itrace_options) - set --itrace options if they have not been set already perf_sample_srcline(context) - returns source_file_name, line_number perf_sample_srccode(context) - returns source_file_name, line_number, source_line - + perf_config_get(config_name) - returns the value of the named config item, or None if unset Util.py Module ~~~~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 9acb8d1f6588..efcdec528a8f 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -48,3 +48,20 @@ OPTIONS --dso:: Specify a DSO for the "Symbols" test. + +-w:: +--workload=:: + Run a built-in workload, to list them use '--list-workloads', current ones include: + noploop, thloop, leafloop, sqrtloop, brstack, datasym and landlock. + + Used with the shell script regression tests. + + Some accept an extra parameter: + + seconds: leafloop, noploop, sqrtloop, thloop + nrloops: brstack + + The datasym and landlock workloads don't accept any. + +--list-workloads:: + List the available workloads to use with -w/--workload. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4dcf7a0fd235..2916d59c88cd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -31,7 +31,7 @@ $(call detected_var,SRCARCH) ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 1 - ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch riscv)) NO_SYSCALL_TABLE := 0 endif @@ -83,6 +83,10 @@ ifeq ($(ARCH),mips) LIBUNWIND_LIBS = -lunwind -lunwind-mips endif +ifeq ($(ARCH),riscv) + CFLAGS += -I$(OUTPUT)arch/riscv/include/generated +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -91,6 +95,10 @@ ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loon NO_LIBDW_DWARF_UNWIND := 1 endif +ifneq ($(LIBUNWIND),1) + NO_LIBUNWIND := 1 +endif + ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 endif @@ -147,9 +155,9 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) @@ -158,9 +166,12 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) DWARFLIBS += -lebl endif + + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl endif -FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) -FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) +FEATURE_CHECK_CFLAGS-libdw := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw := $(LIBDW_LDFLAGS) $(DWARFLIBS) # for linking with debug library, run like: # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ @@ -200,10 +211,6 @@ FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arc # include ARCH specific config -include $(src-perf)/arch/$(SRCARCH)/Makefile -ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET - CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -endif - include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) @@ -423,7 +430,7 @@ ifeq ($(feature-file-handle), 1) endif ifdef NO_LIBELF - NO_DWARF := 1 + NO_LIBDW := 1 NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 @@ -458,28 +465,11 @@ else endif endif else - ifndef NO_LIBDW_DWARF_UNWIND - ifneq ($(feature-libdw-dwarf-unwind),1) - NO_LIBDW_DWARF_UNWIND := 1 - $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR) + ifneq ($(feature-libdw), 1) + ifndef NO_LIBDW + $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.157, disables dwarf support. Please install new elfutils-devel/libdw-dev) + NO_LIBDW := 1 endif - endif - ifneq ($(feature-dwarf), 1) - ifndef NO_DWARF - $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev) - NO_DWARF := 1 - endif - else - ifneq ($(feature-dwarf_getlocations), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157) - else - CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT - endif # dwarf_getlocations - ifneq ($(feature-dwarf_getcfi), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142) - else - CFLAGS += -DHAVE_DWARF_CFI_SUPPORT - endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -490,7 +480,7 @@ ifeq ($(feature-libaio), 1) endif endif -ifdef NO_DWARF +ifdef NO_LIBDW NO_LIBDW_DWARF_UNWIND := 1 endif @@ -568,17 +558,12 @@ ifndef NO_LIBELF endif endif - ifndef NO_DWARF - ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled) - NO_DWARF := 1 - else - CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) - LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += ${DWARFLIBS} - $(call detected,CONFIG_DWARF) - endif # PERF_HAVE_DWARF_REGS - endif # NO_DWARF + ifndef NO_LIBDW + CFLAGS += -DHAVE_LIBDW_SUPPORT $(LIBDW_CFLAGS) + LDFLAGS += $(LIBDW_LDFLAGS) + EXTLIBS += ${DWARFLIBS} + $(call detected,CONFIG_LIBDW) + endif # NO_LIBDW ifndef NO_LIBBPF ifeq ($(feature-bpf), 1) @@ -627,7 +612,7 @@ ifdef PERF_HAVE_JITDUMP endif ifeq ($(SRCARCH),powerpc) - ifndef NO_DWARF + ifndef NO_LIBDW CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif endif @@ -701,8 +686,8 @@ ifeq ($(BUILD_BPF_SKEL),1) BUILD_BPF_SKEL := 0 else CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g') - ifeq ($(call version-lt3,$(CLANG_VERSION),16.0.6),1) - $(warning Warning: Disabled BPF skeletons as at least $(CLANG) version 16.0.6 is reported to be a working setup with the current of BPF based perf features) + ifeq ($(call version-lt3,$(CLANG_VERSION),12.0.1),1) + $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1) BUILD_BPF_SKEL := 0 endif endif @@ -747,8 +732,6 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT $(call detected,CONFIG_DWARF_UNWIND) -else - NO_DWARF_UNWIND := 1 endif ifndef NO_LOCAL_LIBUNWIND @@ -1191,7 +1174,7 @@ endif ifneq ($(NO_LIBTRACEEVENT),1) $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) - CFLAGS += -DHAVE_LIBTRACEEVENT + CFLAGS += -DHAVE_LIBTRACEEVENT $(shell $(PKG_CONFIG) --cflags libtraceevent) LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtraceevent) EXTLIBS += $(shell $(PKG_CONFIG) --libs-only-l libtraceevent) LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent).0.0 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9dd2e8d3f3c9..d74241a15131 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -40,7 +40,7 @@ include ../scripts/utilities.mak # # Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS. # -# Define NO_DWARF if you do not want debug-info analysis feature at all. +# Define NO_LIBDW if you do not want debug-info analysis feature at all. # # Define WERROR=0 to disable treating any warnings as errors. # @@ -52,7 +52,7 @@ include ../scripts/utilities.mak # # Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) # -# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# Define LIBUNWIND if you do not want libunwind dependency for dwarf # backtrace post unwind. # # Define NO_BACKTRACE if you do not want stack backtrace debug feature @@ -1128,12 +1128,11 @@ endif install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) tests/attr.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ - $(INSTALL) tests/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ + $(INSTALL) tests/shell/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ diff --git a/tools/perf/arch/arc/annotate/instructions.c b/tools/perf/arch/arc/annotate/instructions.c index 2f00e995c7e3..e5619770a1af 100644 --- a/tools/perf/arch/arc/annotate/instructions.c +++ b/tools/perf/arch/arc/annotate/instructions.c @@ -5,5 +5,7 @@ static int arc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { arch->initialized = true; arch->objdump.comment_char = ';'; + arch->e_machine = EM_ARC; + arch->e_flags = 0; return 0; } diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 1d88fdab13bf..8b59ce8efb89 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index 2ff6cedeb9c5..cf91a43362b0 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -53,6 +53,8 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm__associate_instruction_ops; arch->objdump.comment_char = ';'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_ARM; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e6dd7cd79ebd..f7a8b37d1c68 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,7 +1,5 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o - perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c deleted file mode 100644 index fc5f71c91802..000000000000 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <stddef.h> -#include <linux/stringify.h> -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%r##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - REG_DWARFNUM_NAME("%fp", 11), - REG_DWARFNUM_NAME("%ip", 12), - REG_DWARFNUM_NAME("%sp", 13), - REG_DWARFNUM_NAME("%lr", 14), - REG_DWARFNUM_NAME("%pc", 15), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 5735ed4479bb..91570d5d428e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 HAVE_KVM_STAT_SUPPORT := 1 # diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index f86d9f4798bd..d465d093e7eb 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -113,6 +113,8 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm64__associate_instruction_ops; arch->objdump.comment_char = '/'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_AARCH64; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 343ef7589a77..a74521b79eaa 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -4,7 +4,6 @@ perf-util-y += perf_regs.o perf-util-y += tsc.o perf-util-y += pmu.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2be99fdf997d..22b19dcc6beb 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -23,9 +23,12 @@ #include "../../../util/debug.h" #include "../../../util/auxtrace.h" #include "../../../util/record.h" +#include "../../../util/header.h" #include "../../../util/arm-spe.h" #include <tools/libc_compat.h> // reallocarray +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -37,11 +40,84 @@ struct arm_spe_recording { bool *wrapped; }; +/* + * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke + * perf_cpu_map__put() to release the map after use. + */ +static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) +{ + struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); + struct perf_cpu_map *intersect_cpus; + + /* cpu map is not "any" CPU , we have specific CPUs to work with */ + if (!perf_cpu_map__has_any_cpu(event_cpus)) { + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); + perf_cpu_map__put(online_cpus); + /* Event can be "any" CPU so count all CPUs. */ + } else { + intersect_cpus = online_cpus; + } + + return intersect_cpus; +} + static size_t arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused) + struct evlist *evlist) +{ + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); + size_t size; + + if (!cpu_map) + return 0; + + size = ARM_SPE_AUXTRACE_PRIV_MAX + + ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); + size *= sizeof(u64); + + perf_cpu_map__put(cpu_map); + return size; +} + +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, + struct perf_cpu cpu, __u64 data[]) { - return ARM_SPE_AUXTRACE_PRIV_SIZE; + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *pmu = NULL; + char *cpuid = NULL; + u64 val; + + /* Read CPU MIDR */ + cpuid = get_cpuid_allow_env_override(cpu); + if (!cpuid) + return -ENOMEM; + val = strtol(cpuid, NULL, 16); + + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; + data[ARM_SPE_CPU] = cpu.cpu; + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; + data[ARM_SPE_CPU_MIDR] = val; + + /* Find the associate Arm SPE PMU for the CPU */ + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) + pmu = sper->arm_spe_pmu; + + if (!pmu) { + /* No Arm SPE PMU is found */ + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; + data[ARM_SPE_CAP_MIN_IVAL] = 0; + } else { + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; + + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) + val = 0; + data[ARM_SPE_CAP_MIN_IVAL] = val; + } + + free(cpuid); + return ARM_SPE_CPU_PRIV_MAX; } static int arm_spe_info_fill(struct auxtrace_record *itr, @@ -49,20 +125,46 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_record_auxtrace_info *auxtrace_info, size_t priv_size) { + int i, ret; + size_t offset; struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_cpu_map *cpu_map; + struct perf_cpu cpu; + __u64 *data; - if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) return -EINVAL; if (!session->evlist->core.nr_mmaps) return -EINVAL; + cpu_map = arm_spe_find_cpus(session->evlist); + if (!cpu_map) + return -EINVAL; + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; + auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); + + offset = ARM_SPE_AUXTRACE_PRIV_MAX; + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { + assert(offset < priv_size); + data = &auxtrace_info->priv[offset]; + ret = arm_spe_save_cpu_header(itr, cpu, data); + if (ret < 0) + goto out; + offset += ret; + } - return 0; + ret = 0; +out: + perf_cpu_map__put(cpu_map); + return ret; } static void @@ -188,9 +290,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, evlist__for_each_entry(evlist, evsel) { if (evsel__is_aux_event(evsel)) { - if (!strstarts(evsel->pmu_name, ARM_SPE_PMU_NAME)) { + if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { pr_err("Found unexpected auxtrace event: %s\n", - evsel->pmu_name); + evsel->pmu->name); return -EINVAL; } opts->full_auxtrace = true; diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c deleted file mode 100644 index 917b97d7c5d3..000000000000 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <errno.h> -#include <stddef.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> /* for struct user_pt_regs */ -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%x##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} -#define DWARFNUM2OFFSET(index) \ - (index * sizeof((struct user_pt_regs *)0)->regs[0]) - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - REG_DWARFNUM_NAME("%lr", 30), - REG_DWARFNUM_NAME("%sp", 31), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return DWARFNUM2OFFSET(roff->dwarfnum); - return -EINVAL; -} diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index 741df3614a09..f445a2dd6293 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -14,73 +14,66 @@ #define MIDR_REVISION_MASK GENMASK(3, 0) #define MIDR_VARIANT_MASK GENMASK(23, 20) -static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { + char path[PATH_MAX]; + FILE *file; const char *sysfs = sysfs__mountpoint(); - struct perf_cpu cpu; - int idx, ret = EINVAL; + assert(cpu.cpu != -1); if (!sysfs || sz < MIDR_SIZE) return EINVAL; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - char path[PATH_MAX]; - FILE *file; - - scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, - sysfs, cpu.cpu); - - file = fopen(path, "r"); - if (!file) { - pr_debug("fopen failed for file %s\n", path); - continue; - } - - if (!fgets(buf, MIDR_SIZE, file)) { - fclose(file); - continue; - } - fclose(file); + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, sysfs, cpu.cpu); - /* got midr break loop */ - ret = 0; - break; + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + return EINVAL; } - return ret; + if (!fgets(buf, MIDR_SIZE, file)) { + pr_debug("Failed to read file %s\n", path); + fclose(file); + return EINVAL; + } + fclose(file); + return 0; } -int get_cpuid(char *buf, size_t sz) +int get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { - struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); - int ret; + struct perf_cpu_map *cpus; + int idx; + if (cpu.cpu != -1) + return _get_cpuid(buf, sz, cpu); + + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return EINVAL; - ret = _get_cpuid(buf, sz, cpus); - - perf_cpu_map__put(cpus); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + int ret = _get_cpuid(buf, sz, cpu); - return ret; + if (ret == 0) + return 0; + } + return EINVAL; } -char *get_cpuid_str(struct perf_pmu *pmu) +char *get_cpuid_str(struct perf_cpu cpu) { - char *buf = NULL; + char *buf = malloc(MIDR_SIZE); int res; - if (!pmu || !pmu->cpus) - return NULL; - - buf = malloc(MIDR_SIZE); if (!buf) return NULL; /* read midr from list of cpus mapped to this pmu */ - res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + res = get_cpuid(buf, MIDR_SIZE, cpu); if (res) { - pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + pr_err("failed to get cpuid string for CPU %d\n", cpu.cpu); free(buf); buf = NULL; } diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 2a4eab2d160e..895fb0d0610c 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,30 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <internal/cpumap.h> -#include "../../../util/cpumap.h" -#include "../../../util/header.h" #include "../../../util/pmu.h" #include "../../../util/pmus.h" +#include "../../../util/tool_pmu.h" #include <api/fs/fs.h> -#include <math.h> -const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - struct perf_pmu *pmu; - - /* Metrics aren't currently supported on heterogeneous Arm systems */ - if (perf_pmus__num_core_pmus() > 1) - return NULL; - - /* Doesn't matter which one here because they'll all be the same */ - pmu = perf_pmus__find_core_pmu(); - if (pmu) - return perf_pmu__find_metrics_table(pmu); - - return NULL; -} - -double perf_pmu__cpu_slots_per_cycle(void) +u64 tool_pmu__cpu_slots_per_cycle(void) { char path[PATH_MAX]; unsigned long long slots = 0; @@ -41,5 +22,5 @@ double perf_pmu__cpu_slots_per_cycle(void) filename__read_ull(path, &slots); } - return slots ? (double)slots : NAN; + return slots; } diff --git a/tools/perf/arch/csky/Makefile b/tools/perf/arch/csky/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/csky/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/csky/annotate/instructions.c b/tools/perf/arch/csky/annotate/instructions.c index 5337bfb7d5fc..14270311d215 100644 --- a/tools/perf/arch/csky/annotate/instructions.c +++ b/tools/perf/arch/csky/annotate/instructions.c @@ -43,6 +43,11 @@ static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->objdump.comment_char = '/'; arch->associate_instruction_ops = csky__associate_ins_ops; - + arch->e_machine = EM_CSKY; +#if defined(__CSKYABIV2__) + arch->e_flags = EF_CSKY_ABIV2; +#else + arch->e_flags = EF_CSKY_ABIV1; +#endif return 0; } diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 99d83f41bf43..5e6ea82c4202 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,4 +1,3 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index c89d6bb6b184..52544d59245b 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index ab43b1ab51e3..70262d5f1444 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -131,6 +131,8 @@ int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = loongarch__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_LOONGARCH; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index b6b97de48233..0aa31986ecb5 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,7 +1,6 @@ perf-util-y += header.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c deleted file mode 100644 index 0f6ebc387463..000000000000 --- a/tools/perf/arch/loongarch/util/dwarf-regs.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include <stdio.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -static struct pt_regs_dwarfnum loongarch_gpr_table[] = { - {"%r0", 0}, {"%r1", 1}, {"%r2", 2}, {"%r3", 3}, - {"%r4", 4}, {"%r5", 5}, {"%r6", 6}, {"%r7", 7}, - {"%r8", 8}, {"%r9", 9}, {"%r10", 10}, {"%r11", 11}, - {"%r12", 12}, {"%r13", 13}, {"%r14", 14}, {"%r15", 15}, - {"%r16", 16}, {"%r17", 17}, {"%r18", 18}, {"%r19", 19}, - {"%r20", 20}, {"%r21", 21}, {"%r22", 22}, {"%r23", 23}, - {"%r24", 24}, {"%r25", 25}, {"%r26", 26}, {"%r27", 27}, - {"%r28", 28}, {"%r29", 29}, {"%r30", 30}, {"%r31", 31}, - {NULL, 0} -}; - -const char *get_arch_regstr(unsigned int n) -{ - n %= 32; - return loongarch_gpr_table[n].name; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = loongarch_gpr_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c index d962dff55512..0c6d823334a2 100644 --- a/tools/perf/arch/loongarch/util/header.c +++ b/tools/perf/arch/loongarch/util/header.c @@ -70,7 +70,7 @@ out_free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { int ret = 0; char *cpuid = _get_cpuid(); @@ -90,7 +90,7 @@ out_free: return ret; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile index cd0b011b3be5..827168f1077a 100644 --- a/tools/perf/arch/mips/Makefile +++ b/tools/perf/arch/mips/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - # Syscall table generation for perf out := $(OUTPUT)arch/mips/include/generated/asm header := $(out)/syscalls_n64.c diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c index 340993f2a897..b50b46c613d6 100644 --- a/tools/perf/arch/mips/annotate/instructions.c +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -40,6 +40,8 @@ int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = mips__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_MIPS; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1464c6be6eb3..c844cd5cda62 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -377,3 +377,7 @@ 460 n64 lsm_set_self_attr sys_lsm_set_self_attr 461 n64 lsm_list_modules sys_lsm_list_modules 462 n64 mseal sys_mseal +463 n64 setxattrat sys_setxattrat +464 n64 getxattrat sys_getxattrat +465 n64 listxattrat sys_listxattrat +466 n64 removexattrat sys_removexattrat diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build index e4644f1e68a0..691fa2051958 100644 --- a/tools/perf/arch/mips/util/Build +++ b/tools/perf/arch/mips/util/Build @@ -1,3 +1,2 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c deleted file mode 100644 index 25c13a91c2a7..000000000000 --- a/tools/perf/arch/mips/util/dwarf-regs.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2013 Cavium, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <stdio.h> -#include <dwarf-regs.h> - -static const char *mips_gpr_names[32] = { - "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", - "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", - "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", - "$30", "$31" -}; - -const char *get_arch_regstr(unsigned int n) -{ - if (n < 32) - return mips_gpr_names[n]; - if (n == 64) - return "hi"; - if (n == 65) - return "lo"; - return NULL; -} diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index bf6d323574f6..dc8f4fb8e324 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,10 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index ede9eeade0ab..ca567cfdcbdb 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -255,7 +255,7 @@ static struct ins_ops *check_ppc_insn(struct disasm_line *dl) * is moved to r31. update_insn_state_powerpc tracks these state * changes */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_powerpc(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die * cu_die __maybe_unused, struct disasm_line *dl) @@ -300,7 +300,7 @@ static void update_insn_state_powerpc(struct type_state *state, insn_offset, src->reg1, dst->reg1); pr_debug_type_name(&tsr->type, tsr->kind); } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { @@ -309,6 +309,8 @@ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = powerpc__associate_instruction_ops; arch->objdump.comment_char = '#'; annotate_opts.show_asm_raw = true; + arch->e_machine = EM_PPC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index ebae8415dfbb..d8b4ab78bef0 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -553,3 +553,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 6c588ecdf3bd..ed82715080f9 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -7,8 +7,7 @@ perf-util-y += sym-handling.o perf-util-y += evsel.o perf-util-y += event.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c deleted file mode 100644 index 104c7ae5c433..000000000000 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Ian Munsie, IBM Corporation. - */ - -#include <stddef.h> -#include <errno.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> -#include <linux/kernel.h> -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; - unsigned int ptregs_offset; -}; - -#define REG_DWARFNUM_NAME(r, num) \ - {.name = __stringify(%)__stringify(r), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, r)} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%gpr##num), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, gpr[num])} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} - -/* - * Reference: - * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - GPR_DWARFNUM_NAME(30), - GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME(msr, 66), - REG_DWARFNUM_NAME(ctr, 109), - REG_DWARFNUM_NAME(link, 108), - REG_DWARFNUM_NAME(xer, 101), - REG_DWARFNUM_NAME(dar, 119), - REG_DWARFNUM_NAME(dsisr, 118), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->ptregs_offset; - return -EINVAL; -} - -#define PPC_OP(op) (((op) >> 26) & 0x3F) -#define PPC_RA(a) (((a) >> 16) & 0x1f) -#define PPC_RT(t) (((t) >> 21) & 0x1f) -#define PPC_RB(b) (((b) >> 11) & 0x1f) -#define PPC_D(D) ((D) & 0xfffe) -#define PPC_DS(DS) ((DS) & 0xfffc) -#define OP_LD 58 -#define OP_STD 62 - -static int get_source_reg(u32 raw_insn) -{ - return PPC_RA(raw_insn); -} - -static int get_target_reg(u32 raw_insn) -{ - return PPC_RT(raw_insn); -} - -static int get_offset_opcode(u32 raw_insn) -{ - int opcode = PPC_OP(raw_insn); - - /* DS- form */ - if ((opcode == OP_LD) || (opcode == OP_STD)) - return PPC_DS(raw_insn); - else - return PPC_D(raw_insn); -} - -/* - * Fills the required fields for op_loc depending on if it - * is a source or target. - * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT - * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT - * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT - */ -void get_powerpc_regs(u32 raw_insn, int is_source, - struct annotated_op_loc *op_loc) -{ - if (is_source) - op_loc->reg1 = get_source_reg(raw_insn); - else - op_loc->reg1 = get_target_reg(raw_insn); - - if (op_loc->multi_regs) - op_loc->reg2 = PPC_RB(raw_insn); - - /* TODO: Implement offset handling for X Form */ - if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) - op_loc->offset = get_offset_opcode(raw_insn); -} diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6b00efd53638..c7df534dbf8f 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -10,9 +10,21 @@ #include "utils_header.h" #include "metricgroup.h" #include <api/fs/fs.h> +#include <sys/auxv.h> + +static bool is_compat_mode(void) +{ + u64 base_platform = getauxval(AT_BASE_PLATFORM); + u64 platform = getauxval(AT_PLATFORM); + + if (!strcmp((char *)platform, (char *)base_platform)) + return false; + + return true; +} int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { unsigned long pvr; int nb; @@ -30,11 +42,29 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *bufp; + unsigned long pvr; + + /* + * IBM Power System supports compatible mode. That is + * Nth generation platform can support previous generation + * OS in a mode called compatibile mode. For ex. LPAR can be + * booted in a Power9 mode when the system is a Power10. + * + * In the compatible mode, care must be taken when generating + * PVR value. When read, PVR will be of the AT_BASE_PLATFORM + * To support generic events, return 0x00ffffff as pvr when + * booted in compat mode. Based on this pvr value, json will + * pick events from pmu-events/arch/powerpc/compat + */ + if (!is_compat_mode()) + pvr = mfspr(SPRN_PVR); + else + pvr = 0x00ffffff; - if (asprintf(&bufp, "0x%.8lx", mfspr(SPRN_PVR)) < 0) + if (asprintf(&bufp, "0x%.8lx", pvr) < 0) bufp = NULL; return bufp; diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile index 90c3c476a242..18ad078000e2 100644 --- a/tools/perf/arch/riscv/Makefile +++ b/tools/perf/arch/riscv/Makefile @@ -1,6 +1,25 @@ -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +# SPDX-License-Identifier: GPL-2.0 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/riscv/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/riscv/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/riscv/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +$(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, riscv) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..c59f5e852b97 --- /dev/null +++ b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl @@ -0,0 +1,47 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> +# Changed by: Kim Phillips <kim.phillips@arm.com> +# Changed by: Björn Töpel <bjorn@rivosinc.com> + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_sc_table() +{ + local sc nr max_nr + + while read sc nr; do + printf "%s\n" " [$nr] = \"$sc\"," + max_nr=$nr + done + + echo "#define SYSCALLTBL_RISCV_MAX_ID $max_nr" +} + +create_table() +{ + echo "#include \"$input\"" + echo "static const char *const syscalltbl_riscv[] = {" + create_sc_table + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |awk '$2 ~ "__NR" && $3 !~ "__NR3264_" { + sub("^#define __NR(3264)?_", ""); + print | "sort -k2 -n"}' \ + |create_table diff --git a/tools/perf/arch/riscv/include/dwarf-regs-table.h b/tools/perf/arch/riscv/include/dwarf-regs-table.h new file mode 100644 index 000000000000..a45b63a6d5a8 --- /dev/null +++ b/tools/perf/arch/riscv/include/dwarf-regs-table.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef DEFINE_DWARF_REGSTR_TABLE +/* This is included in perf/util/dwarf-regs.c */ + +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg + +static const char * const riscv_regstr_tbl[] = { + REG_DWARFNUM_NAME("%zero", 0), + REG_DWARFNUM_NAME("%ra", 1), + REG_DWARFNUM_NAME("%sp", 2), + REG_DWARFNUM_NAME("%gp", 3), + REG_DWARFNUM_NAME("%tp", 4), + REG_DWARFNUM_NAME("%t0", 5), + REG_DWARFNUM_NAME("%t1", 6), + REG_DWARFNUM_NAME("%t2", 7), + REG_DWARFNUM_NAME("%s0", 8), + REG_DWARFNUM_NAME("%s1", 9), + REG_DWARFNUM_NAME("%a0", 10), + REG_DWARFNUM_NAME("%a1", 11), + REG_DWARFNUM_NAME("%a2", 12), + REG_DWARFNUM_NAME("%a3", 13), + REG_DWARFNUM_NAME("%a4", 14), + REG_DWARFNUM_NAME("%a5", 15), + REG_DWARFNUM_NAME("%a6", 16), + REG_DWARFNUM_NAME("%a7", 17), + REG_DWARFNUM_NAME("%s2", 18), + REG_DWARFNUM_NAME("%s3", 19), + REG_DWARFNUM_NAME("%s4", 20), + REG_DWARFNUM_NAME("%s5", 21), + REG_DWARFNUM_NAME("%s6", 22), + REG_DWARFNUM_NAME("%s7", 23), + REG_DWARFNUM_NAME("%s8", 24), + REG_DWARFNUM_NAME("%s9", 25), + REG_DWARFNUM_NAME("%s10", 26), + REG_DWARFNUM_NAME("%s11", 27), + REG_DWARFNUM_NAME("%t3", 28), + REG_DWARFNUM_NAME("%t4", 29), + REG_DWARFNUM_NAME("%t5", 30), + REG_DWARFNUM_NAME("%t6", 31), +}; + +#endif diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index f865cb0489ec..58a672246024 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -2,5 +2,4 @@ perf-util-y += perf_regs.o perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/dwarf-regs.c b/tools/perf/arch/riscv/util/dwarf-regs.c deleted file mode 100644 index cd0504c02e2e..000000000000 --- a/tools/perf/arch/riscv/util/dwarf-regs.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. - * Mapping of DWARF debug register numbers into register names. - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -struct pt_regs_dwarfnum riscv_dwarf_regs_table[] = { - REG_DWARFNUM_NAME("%zero", 0), - REG_DWARFNUM_NAME("%ra", 1), - REG_DWARFNUM_NAME("%sp", 2), - REG_DWARFNUM_NAME("%gp", 3), - REG_DWARFNUM_NAME("%tp", 4), - REG_DWARFNUM_NAME("%t0", 5), - REG_DWARFNUM_NAME("%t1", 6), - REG_DWARFNUM_NAME("%t2", 7), - REG_DWARFNUM_NAME("%s0", 8), - REG_DWARFNUM_NAME("%s1", 9), - REG_DWARFNUM_NAME("%a0", 10), - REG_DWARFNUM_NAME("%a1", 11), - REG_DWARFNUM_NAME("%a2", 12), - REG_DWARFNUM_NAME("%a3", 13), - REG_DWARFNUM_NAME("%a4", 14), - REG_DWARFNUM_NAME("%a5", 15), - REG_DWARFNUM_NAME("%a6", 16), - REG_DWARFNUM_NAME("%a7", 17), - REG_DWARFNUM_NAME("%s2", 18), - REG_DWARFNUM_NAME("%s3", 19), - REG_DWARFNUM_NAME("%s4", 20), - REG_DWARFNUM_NAME("%s5", 21), - REG_DWARFNUM_NAME("%s6", 22), - REG_DWARFNUM_NAME("%s7", 23), - REG_DWARFNUM_NAME("%s8", 24), - REG_DWARFNUM_NAME("%s9", 25), - REG_DWARFNUM_NAME("%s10", 26), - REG_DWARFNUM_NAME("%s11", 27), - REG_DWARFNUM_NAME("%t3", 28), - REG_DWARFNUM_NAME("%t4", 29), - REG_DWARFNUM_NAME("%t5", 30), - REG_DWARFNUM_NAME("%t6", 31), - REG_DWARFNUM_END, -}; - -#define RISCV_MAX_REGS ((sizeof(riscv_dwarf_regs_table) / \ - sizeof(riscv_dwarf_regs_table[0])) - 1) - -const char *get_arch_regstr(unsigned int n) -{ - return (n < RISCV_MAX_REGS) ? riscv_dwarf_regs_table[n].name : NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = riscv_dwarf_regs_table; roff->name; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c index 1b29030021ee..4b839203d4a5 100644 --- a/tools/perf/arch/riscv/util/header.c +++ b/tools/perf/arch/riscv/util/header.c @@ -81,7 +81,7 @@ free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cpuid = _get_cpuid(); int ret = 0; @@ -98,7 +98,7 @@ free: } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/riscv64/annotate/instructions.c b/tools/perf/arch/riscv64/annotate/instructions.c index 869a0eb28953..55cf911633f8 100644 --- a/tools/perf/arch/riscv64/annotate/instructions.c +++ b/tools/perf/arch/riscv64/annotate/instructions.c @@ -28,6 +28,8 @@ int riscv64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = riscv64__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_RISCV; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 56994e63b43a..c431c21b11ef 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index eeac25cca699..c61193f1e096 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -166,6 +166,8 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) if (s390__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } + arch->e_machine = EM_S390; + arch->e_flags = 0; } return err; diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 01071182763e..e9115b4d8b63 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -465,3 +465,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal sys_mseal +463 common setxattrat sys_setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat sys_removexattrat diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 1ac830030ff3..736c0ad09194 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,7 +2,6 @@ perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-y += machine.o diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c deleted file mode 100644 index dfddb3099bfa..000000000000 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright IBM Corp. 2010, 2017 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - */ - -#include <errno.h> -#include <stddef.h> -#include <stdlib.h> -#include <linux/kernel.h> -#include <asm/ptrace.h> -#include <string.h> -#include <dwarf-regs.h> -#include "dwarf-regs-table.h" - -const char *get_arch_regstr(unsigned int n) -{ - return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n]; -} - -/* - * Convert the register name into an offset to struct pt_regs (kernel). - * This is required by the BPF prologue generator. The BPF - * program is called in the BPF overflow handler in the perf - * core. - */ -int regs_query_register_offset(const char *name) -{ - unsigned long gpr; - - if (!name || strncmp(name, "%r", 2)) - return -EINVAL; - - errno = 0; - gpr = strtoul(name + 2, NULL, 10); - if (errno || gpr >= 16) - return -EINVAL; - - return offsetof(user_pt_regs, gprs) + 8 * gpr; -} diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 7933f6871c81..db54677a17d2 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -27,7 +27,7 @@ #define SYSINFO "/proc/sysinfo" #define SRVLVL "/proc/service_levels" -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cp, *line = NULL, *line2; char type[8], model[33], version[8], manufacturer[32], authorization[8]; @@ -137,11 +137,11 @@ skip_sysinfo: return (nbytes >= sz) ? ENOBUFS : 0; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu) { char *buf = malloc(128); - if (buf && get_cpuid(buf, 128)) + if (buf && get_cpuid(buf, 128, cpu)) zfree(&buf); return buf; } diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sh/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/sh/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sh/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c deleted file mode 100644 index 4b17fc86c73b..000000000000 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ /dev/null @@ -1,41 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -/* - * Generic dwarf analysis helpers - */ - -#define SH_MAX_REGS 18 -const char *sh_regs_table[SH_MAX_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "pc", - "pr", -}; - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sparc/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 4031db72ba71..8b59ce8efb89 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,6 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c index 2614c010c235..68c31580ccfc 100644 --- a/tools/perf/arch/sparc/annotate/instructions.c +++ b/tools/perf/arch/sparc/annotate/instructions.c @@ -163,6 +163,8 @@ static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->associate_instruction_ops = sparc__associate_instruction_ops; arch->objdump.comment_char = '#'; + arch->e_machine = EM_SPARC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sparc/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c deleted file mode 100644 index 1282cb2dc7bd..000000000000 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 David S. Miller <davem@davemloft.net> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define SPARC_MAX_REGS 96 - -const char *sparc_regs_table[SPARC_MAX_REGS] = { - "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7", - "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7", - "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7", - "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7", - "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", - "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", - "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", - "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", - "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39", - "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47", - "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55", - "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63", -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 67b4969a6738..a6b6e0a9308a 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 ### diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5caf5a17f03d..ae94b1f0b9cc 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -202,12 +202,13 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) if (x86__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } - + arch->e_machine = EM_X86_64; + arch->e_flags = 0; arch->initialized = true; return err; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_x86(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 534c74b14fab..4d0fb2fba7e2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -468,3 +468,7 @@ 460 i386 lsm_set_self_attr sys_lsm_set_self_attr 461 i386 lsm_list_modules sys_lsm_list_modules 462 i386 mseal sys_mseal +463 i386 setxattrat sys_setxattrat +464 i386 getxattrat sys_getxattrat +465 i386 listxattrat sys_listxattrat +466 i386 removexattrat sys_removexattrat diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7093ee21c0d1..5eb708bff1c7 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -386,6 +386,10 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c deleted file mode 100644 index 360a082fc928..000000000000 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "tests/tests.h" -#include "cloexec.h" -#include "debug.h" -#include "evlist.h" -#include "evsel.h" -#include "arch-tests.h" -#include <internal/lib.h> // page_size - -#include <signal.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <errno.h> -#include <string.h> - -static pid_t spawn(void) -{ - pid_t pid; - - pid = fork(); - if (pid) - return pid; - - while(1) - sleep(5); - return 0; -} - -/* - * Create an event group that contains both a sampled hardware - * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then - * wait for the hardware perf counter to overflow and generate a PMI, - * which triggers an event read for both of the events in the group. - * - * Since reading Intel CQM event counters requires sending SMP IPIs, the - * CQM pmu needs to handle the above situation gracefully, and return - * the last read counter value to avoid triggering a WARN_ON_ONCE() in - * smp_call_function_many() caused by sending IPIs from NMI context. - */ -int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct evlist *evlist = NULL; - struct evsel *evsel = NULL; - struct perf_event_attr pe; - int i, fd[2], flag, ret; - size_t mmap_len; - void *event; - pid_t pid; - int err = TEST_FAIL; - - flag = perf_event_open_cloexec_flag(); - - evlist = evlist__new(); - if (!evlist) { - pr_debug("evlist__new failed\n"); - return TEST_FAIL; - } - - ret = parse_event(evlist, "intel_cqm/llc_occupancy/"); - if (ret) { - pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); - err = TEST_SKIP; - goto out; - } - - evsel = evlist__first(evlist); - if (!evsel) { - pr_debug("evlist__first failed\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = PERF_TYPE_HARDWARE; - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.read_format = PERF_FORMAT_GROUP; - - pe.sample_period = 128; - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; - - pid = spawn(); - - fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); - if (fd[0] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = evsel->attr.type; - pe.config = evsel->attr.config; - - fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); - if (fd[1] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - /* - * Pick a power-of-two number of pages + 1 for the meta-data - * page (struct perf_event_mmap_page). See tools/perf/design.txt. - */ - mmap_len = page_size * 65; - - event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); - if (event == (void *)(-1)) { - pr_debug("failed to mmap %d\n", errno); - goto out; - } - - sleep(1); - - err = TEST_OK; - - munmap(event, mmap_len); - - for (i = 0; i < 2; i++) - close(fd[i]); - - kill(pid, SIGKILL); - wait(NULL); -out: - evlist__delete(evlist); - return err; -} diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 09d61fa736e3..b217ed67cd4e 100644 --- a/tools/perf/arch/x86/tests/intel-pt-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -375,7 +375,7 @@ static int get_pt_caps(int cpu, struct pt_caps *caps) return 0; } -static bool is_hydrid(void) +static bool is_hybrid(void) { unsigned int eax, ebx, ecx, edx = 0; bool result; @@ -441,7 +441,7 @@ int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) int ret = TEST_OK; int cpu; - if (!is_hydrid()) { + if (!is_hybrid()) { test->test_cases[subtest].skip_reason = "not hybrid"; return TEST_SKIP; } diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 2607ed5c4296..848327378694 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -10,10 +10,6 @@ perf-util-y += evlist.o perf-util-y += mem-events.o perf-util-y += evsel.o perf-util-y += iostat.o -perf-util-y += env.o - -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 354780ff1605..ecbf61a7eb3a 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -55,11 +55,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int *err) { char buffer[64]; + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); int ret; *err = 0; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) { *err = ret; return NULL; diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c deleted file mode 100644 index 399c4a0a29d8..000000000000 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * Extracted from probe-finder.c - * - * Written by Masami Hiramatsu <mhiramat@redhat.com> - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <linux/ptrace.h> /* for struct pt_regs */ -#include <linux/kernel.h> /* for offsetof */ -#include <dwarf-regs.h> - -/* - * See arch/x86/kernel/ptrace.c. - * Different from it: - * - * - Since struct pt_regs is defined differently for user and kernel, - * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct - * field name of user's pt_regs), we make REG_OFFSET_NAME to accept - * both string name and reg field name. - * - * - Since accessing x86_32's pt_regs from x86_64 building is difficult - * and vise versa, we simply fill offset with -1, so - * get_arch_regstr() still works but regs_query_register_offset() - * returns error. - * The only inconvenience caused by it now is that we are not allowed - * to generate BPF prologue for a x86_64 kernel if perf is built for - * x86_32. This is really a rare usecase. - * - * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use - * the order defined by dwarf. - */ - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#ifdef __x86_64__ -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} -#else -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -#endif - -/* TODO: switching by dwarf address size */ -#ifndef __x86_64__ -static const struct pt_regs_offset x86_32_regoffset_table[] = { - REG_OFFSET_NAME_32("%ax", eax), - REG_OFFSET_NAME_32("%cx", ecx), - REG_OFFSET_NAME_32("%dx", edx), - REG_OFFSET_NAME_32("%bx", ebx), - REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ - REG_OFFSET_NAME_32("%bp", ebp), - REG_OFFSET_NAME_32("%si", esi), - REG_OFFSET_NAME_32("%di", edi), - REG_OFFSET_END, -}; - -#define regoffset_table x86_32_regoffset_table -#else -static const struct pt_regs_offset x86_64_regoffset_table[] = { - REG_OFFSET_NAME_64("%ax", rax), - REG_OFFSET_NAME_64("%dx", rdx), - REG_OFFSET_NAME_64("%cx", rcx), - REG_OFFSET_NAME_64("%bx", rbx), - REG_OFFSET_NAME_64("%si", rsi), - REG_OFFSET_NAME_64("%di", rdi), - REG_OFFSET_NAME_64("%bp", rbp), - REG_OFFSET_NAME_64("%sp", rsp), - REG_OFFSET_NAME_64("%r8", r8), - REG_OFFSET_NAME_64("%r9", r9), - REG_OFFSET_NAME_64("%r10", r10), - REG_OFFSET_NAME_64("%r11", r11), - REG_OFFSET_NAME_64("%r12", r12), - REG_OFFSET_NAME_64("%r13", r13), - REG_OFFSET_NAME_64("%r14", r14), - REG_OFFSET_NAME_64("%r15", r15), - REG_OFFSET_END, -}; - -#define regoffset_table x86_64_regoffset_table -#endif - -/* Minus 1 for the ending REG_OFFSET_END */ -#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; -} - -/* Reuse code from arch/x86/kernel/ptrace.c */ -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -struct dwarf_regs_idx { - const char *name; - int idx; -}; - -static const struct dwarf_regs_idx x86_regidx_table[] = { - { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, - { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, - { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, - { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, - { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, - { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, - { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, - { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, - { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, - { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, - { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, - { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, - { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, - { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, - { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, - { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, - { "rip", DWARF_REG_PC }, -}; - -int get_arch_regnum(const char *name) -{ - unsigned int i; - - if (*name != '%') - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) - if (!strcmp(x86_regidx_table[i].name, name + 1)) - return x86_regidx_table[i].idx; - return -ENOENT; -} diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c deleted file mode 100644 index 3e537ffb1353..000000000000 --- a/tools/perf/arch/x86/util/env.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "linux/string.h" -#include "util/env.h" -#include "env.h" - -bool x86__is_amd_cpu(void) -{ - struct perf_env env = { .total_mem = 0, }; - static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ - - if (is_amd) - goto ret; - - perf_env__cpuid(&env); - is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1; - perf_env__exit(&env); -ret: - return is_amd >= 1 ? true : false; -} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h deleted file mode 100644 index d78f080b6b3f..000000000000 --- a/tools/perf/arch/x86/util/env.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _X86_ENV_H -#define _X86_ENV_H - -bool x86__is_amd_cpu(void); - -#endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index cebdd483149e..447a734e591c 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -1,91 +1,86 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include "util/pmu.h" -#include "util/pmus.h" -#include "util/evlist.h" -#include "util/parse-events.h" -#include "util/event.h" +#include <string.h> +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" #include "topdown.h" #include "evsel.h" -static int ___evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - LIST_HEAD(head); - size_t i = 0; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - if (perf_pmus__num_core_pmus() == 1) - return evlist__add_attrs(evlist, attrs, nr_attrs); - - for (i = 0; i < nr_attrs; i++) { - struct perf_pmu *pmu = NULL; - - if (attrs[i].type == PERF_TYPE_SOFTWARE) { - struct evsel *evsel = evsel__new(attrs + i); - - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - continue; - } - - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - struct perf_cpu_map *cpus; - struct evsel *evsel; - - evsel = evsel__new(attrs + i); - if (evsel == NULL) - goto out_delete_partial_list; - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - cpus = perf_cpu_map__get(pmu->cpus); - evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->pmu_name = strdup(pmu->name); - list_add_tail(&evsel->core.node, &head); - } - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - { - struct evsel *evsel, *n; - - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - } - return -1; -} - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { + /* + * Currently the following topdown events sequence are supported to + * move and regroup correctly. + * + * a. all events in a group + * perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 15,066,240 slots + * 1,899,760 instructions + * 2,126,998 topdown-retiring + * b. all events not in a group + * perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 2,045,561 instructions + * 17,108,370 slots + * 2,281,116 topdown-retiring + * c. slots event in a group but topdown metrics events outside the group + * perf stat -e "{instructions,slots},topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 20,323,878 slots + * 2,634,884 instructions + * 3,028,656 topdown-retiring + * d. slots event and topdown metrics events in two groups + * perf stat -e "{instructions,slots},{topdown-retiring}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 26,319,024 slots + * 2,427,791 instructions + * 2,683,508 topdown-retiring + * + * If slots event and topdown metrics events are not in same group, the + * topdown metrics events must be first event after the slots event group, + * otherwise topdown metrics events can't be regrouped correctly, e.g. + * + * a. perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 17,923,134 slots + * 2,154,855 instructions + * 3,015,058 cycles + * <not supported> topdown-retiring + * + * If slots event and topdown metrics events are in two groups, the group which + * has topdown metrics events must contain only the topdown metrics event, + * otherwise topdown metrics event can't be regrouped correctly as well, e.g. + * + * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Error: + * The sys_perf_event_open() syscall returned with 22 (Invalid argument) for + * event (topdown-retiring) + */ if (topdown_sys_has_perf_metrics() && (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { /* Ensure the topdown slots comes first. */ - if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(lhs)) return -1; - if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(rhs)) return 1; - /* Followed by topdown events. */ - if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown")) + + /* + * Move topdown metrics events forward only when topdown metrics + * events are not in same group with previous slots event. If + * topdown metrics events are already in same group with slots + * event, do nothing. + */ + if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return -1; - if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown")) + if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return 1; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 090d0f371891..3dd29ba2c23b 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -6,6 +6,7 @@ #include "util/pmu.h" #include "util/pmus.h" #include "linux/string.h" +#include "topdown.h" #include "evsel.h" #include "util/debug.h" #include "env.h" @@ -21,7 +22,8 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) /* Check whether the evsel's PMU supports the perf metrics */ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { - const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + struct perf_pmu *pmu; + u32 type = evsel->core.attr.type; /* * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU @@ -31,11 +33,31 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) * Checking both the PERF_TYPE_RAW type and the slots event * should be good enough to detect the perf metrics feature. */ - if ((evsel->core.attr.type == PERF_TYPE_RAW) && - perf_pmus__have_event(pmu_name, "slots")) - return true; +again: + switch (type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + if (type) + goto again; + break; + case PERF_TYPE_RAW: + break; + default: + return false; + } + + pmu = evsel->pmu; + if (pmu && perf_pmu__is_fake(pmu)) + pmu = NULL; - return false; + if (!pmu) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == PERF_TYPE_RAW) + break; + } + } + return pmu && perf_pmu__have_event(pmu, "slots"); } bool arch_evsel__must_be_in_group(const struct evsel *evsel) @@ -44,7 +66,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel) strcasestr(evsel->name, "uops_retired.slots")) return false; - return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots"); + return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); } int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) @@ -63,7 +85,7 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return scnprintf(bf, size, "%s", event_name); return scnprintf(bf, size, "%s/%s/", - evsel->pmu_name ? evsel->pmu_name : "cpu", + evsel->pmu ? evsel->pmu->name : "cpu", event_name); } @@ -108,7 +130,7 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) return 0; if (!evsel->core.attr.precise_ip && - !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3))) + !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3))) return 0; /* More verbose IBS errors. */ diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index a51444a77a5f..412977f8aa83 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -58,13 +58,12 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) } int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); } -char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index ea510a7486b1..8f235d8b67b6 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -75,7 +75,8 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, goto out_free; attr.config = *config; - err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*err=*/NULL); + err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false, + /*err=*/NULL); if (err) goto out_free; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index df7b5dfcc26a..366b44d0bb7e 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -444,7 +444,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, iostat_value = (count->val - prev_count_val) / ((double) count->run / count->ena); } - out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.0f", iostat_metric, iostat_value / (256 * 1024)); } diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index c3d89d6ba1bf..e0060dac2a9f 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -16,7 +16,7 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" -#include "env.h" +#include "util/env.h" void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 3f9a267d4501..f63747d0abdf 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "api/fs/fs.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" #include "util/topdown.h" @@ -32,6 +33,31 @@ bool topdown_sys_has_perf_metrics(void) } #define TOPDOWN_SLOTS 0x0400 +bool arch_is_topdown_slots(const struct evsel *evsel) +{ + if (evsel->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +bool arch_is_topdown_metrics(const struct evsel *evsel) +{ + int config = evsel->core.attr.config; + const char *name_from_config; + struct perf_pmu *pmu; + + /* All topdown events have an event code of 0. */ + if ((config & 0xFF) != 0) + return false; + + pmu = evsel__find_pmu(evsel); + if (!pmu || !pmu->is_core) + return false; + + name_from_config = perf_pmu__name_from_config(pmu, config); + return name_from_config && strcasestr(name_from_config, "topdown"); +} /* * Check whether a topdown group supports sample-read. @@ -41,11 +67,24 @@ bool topdown_sys_has_perf_metrics(void) */ bool arch_topdown_sample_read(struct evsel *leader) { + struct evsel *evsel; + if (!evsel__sys_has_perf_metrics(leader)) return false; - if (leader->core.attr.config == TOPDOWN_SLOTS) - return true; + if (!arch_is_topdown_slots(leader)) + return false; + + /* + * If slots event as leader event but no topdown metric events + * in group, slots event should still sample as leader. + */ + evlist__for_each_entry(leader->evlist, evsel) { + if (evsel->core.leader != leader->core.leader) + return false; + if (evsel != leader && arch_is_topdown_metrics(evsel)) + return true; + } return false; } diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 46bf9273e572..1bae9b1822d7 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -3,5 +3,7 @@ #define _TOPDOWN_H 1 bool topdown_sys_has_perf_metrics(void); +bool arch_is_topdown_slots(const struct evsel *evsel); +bool arch_is_topdown_metrics(const struct evsel *evsel); #endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index e2d6cfe21057..3a439e4b12d2 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -24,9 +24,9 @@ u64 rdtsc(void) * ... * will return 3000000000. */ -static double cpuinfo_tsc_freq(void) +static u64 cpuinfo_tsc_freq(void) { - double result = 0; + u64 result = 0; FILE *cpuinfo; char *line = NULL; size_t len = 0; @@ -34,20 +34,22 @@ static double cpuinfo_tsc_freq(void) cpuinfo = fopen("/proc/cpuinfo", "r"); if (!cpuinfo) { pr_err("Failed to read /proc/cpuinfo for TSC frequency\n"); - return NAN; + return 0; } while (getline(&line, &len, cpuinfo) > 0) { if (!strncmp(line, "model name", 10)) { char *pos = strstr(line + 11, " @ "); + double float_result; - if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { - result *= 1000000000; + if (pos && sscanf(pos, " @ %lfGHz", &float_result) == 1) { + float_result *= 1000000000; + result = (u64)float_result; goto out; } } } out: - if (fpclassify(result) == FP_ZERO) + if (result == 0) pr_err("Failed to find TSC frequency in /proc/cpuinfo\n"); free(line); @@ -55,7 +57,7 @@ out: return result; } -double arch_get_tsc_freq(void) +u64 arch_get_tsc_freq(void) { unsigned int a, b, c, d, lvl; static bool cached; @@ -86,6 +88,6 @@ double arch_get_tsc_freq(void) return tsc; } - tsc = (double)c * (double)b / (double)a; + tsc = (u64)c * (u64)b / (u64)a; return tsc; } diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/xtensa/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/xtensa/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build deleted file mode 100644 index e813e618954b..000000000000 --- a/tools/perf/arch/xtensa/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c deleted file mode 100644 index 12f5457300f5..000000000000 --- a/tools/perf/arch/xtensa/util/dwarf-regs.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (c) 2015 Cadence Design Systems Inc. - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define XTENSA_MAX_REGS 16 - -const char *xtensa_regs_table[XTENSA_MAX_REGS] = { - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", -}; - -const char *get_arch_regstr(unsigned int n) -{ - return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; -} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 1fbd7c947abc..19be2aaf4dc0 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -27,6 +27,7 @@ #include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> +#include <sys/stat.h> #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> @@ -35,6 +36,7 @@ #include "../util/header.h" #include "../util/mutex.h" +#include <api/fs/fs.h> #include <numa.h> #include <numaif.h> @@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg) return 0; } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +static int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + static int parse_setup_cpu_list(void) { struct thread_data *td; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3af6d3c55aba..e2562677df96 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -23,6 +23,7 @@ #include <errno.h> #include <fcntl.h> #include <assert.h> +#include <sys/epoll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/syscall.h> @@ -34,6 +35,8 @@ struct thread_data { int nr; int pipe_read; int pipe_write; + struct epoll_event epoll_ev; + int epoll_fd; bool cgroup_failed; pthread_t pthread; }; @@ -44,6 +47,7 @@ static int loops = LOOPS_DEFAULT; /* Use processes by default: */ static bool threaded; +static bool nonblocking; static char *cgrp_names[2]; static struct cgroup *cgrps[2]; @@ -81,6 +85,7 @@ out: } static const struct option options[] = { + OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV", @@ -165,11 +170,25 @@ static void exit_cgroup(int nr) free(cgrp_names[nr]); } +static inline int read_pipe(struct thread_data *td) +{ + int ret, m; +retry: + if (nonblocking) { + ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1); + if (ret < 0) + return ret; + } + ret = read(td->pipe_read, &m, sizeof(int)); + if (nonblocking && ret < 0 && errno == EWOULDBLOCK) + goto retry; + return ret; +} + static void *worker_thread(void *__tdata) { struct thread_data *td = __tdata; - int m = 0, i; - int ret; + int i, ret, m = 0; ret = enter_cgroup(td->nr); if (ret < 0) { @@ -177,16 +196,23 @@ static void *worker_thread(void *__tdata) return NULL; } + if (nonblocking) { + td->epoll_ev.events = EPOLLIN; + td->epoll_fd = epoll_create(1); + BUG_ON(td->epoll_fd < 0); + BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0); + } + for (i = 0; i < loops; i++) { if (!td->nr) { - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); } else { ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); } } @@ -209,13 +235,16 @@ int bench_sched_pipe(int argc, const char **argv) * discarding returned value of read(), write() * causes error in building environment for perf */ - int __maybe_unused ret, wait_stat; + int __maybe_unused ret, wait_stat, flags = 0; pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - BUG_ON(pipe(pipe_1)); - BUG_ON(pipe(pipe_2)); + if (nonblocking) + flags |= O_NONBLOCK; + + BUG_ON(pipe2(pipe_1, flags)); + BUG_ON(pipe2(pipe_2, flags)); gettimeofday(&start, NULL); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3dc6197ef3fa..bb87e6e7687d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -840,7 +840,7 @@ int cmd_annotate(int argc, const char **argv) } #endif -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT if (annotate.data_type) { pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); return -ENOTSUP; diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c index 0b76b6e42b78..2346536a5ee1 100644 --- a/tools/perf/builtin-check.c +++ b/tools/perf/builtin-check.c @@ -27,15 +27,15 @@ struct feature_status supported_features[] = { FEATURE_STATUS("bpf", HAVE_LIBBPF_SUPPORT), FEATURE_STATUS("bpf_skeletons", HAVE_BPF_SKEL), FEATURE_STATUS("debuginfod", HAVE_DEBUGINFOD_SUPPORT), - FEATURE_STATUS("dwarf", HAVE_DWARF_SUPPORT), - FEATURE_STATUS("dwarf_getlocations", HAVE_DWARF_GETLOCATIONS_SUPPORT), + FEATURE_STATUS("dwarf", HAVE_LIBDW_SUPPORT), + FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT), FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT), FEATURE_STATUS("libaudit", HAVE_LIBAUDIT_SUPPORT), FEATURE_STATUS("libbfd", HAVE_LIBBFD_SUPPORT), FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT), FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT), - FEATURE_STATUS("libdw-dwarf-unwind", HAVE_DWARF_SUPPORT), + FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT), FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT), FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT), diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 23326dd20333..82fb7773e03e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -469,13 +469,13 @@ out: static struct perf_diff pdiff; -static struct evsel *evsel_match(struct evsel *evsel, - struct evlist *evlist) +static struct evsel *evsel_match(struct evsel *evsel, struct evlist *evlist) { struct evsel *e; evlist__for_each_entry(evlist, e) { - if (evsel__match2(evsel, e)) + if ((evsel->core.attr.type == e->core.attr.type) && + (evsel->core.attr.config == e->core.attr.config)) return e; } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index abcdc49b7a98..a56cf8b0a7d4 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -815,7 +815,7 @@ static void display_histogram(int buckets[], bool use_nsec) bar_len = buckets[0] * bar_total / total; printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + 0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); for (i = 1; i < NUM_BUCKET - 1; i++) { int start = (1 << (i - 1)); @@ -1151,8 +1151,9 @@ static int cmp_profile_data(const void *a, const void *b) if (v1 > v2) return -1; - else + if (v1 < v2) return 1; + return 0; } static void print_profile_result(struct perf_ftrace *ftrace) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a756147e2eec..4d8d94146f8d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -36,7 +36,7 @@ #include <regex.h> #include <linux/ctype.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 55ea17c5ff02..274568d712d1 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1226,7 +1226,9 @@ static int cpu_isa_config(struct perf_kvm_stat *kvm) int err; if (kvm->live) { - err = get_cpuid(buf, sizeof(buf)); + struct perf_cpu cpu = {-1}; + + err = get_cpuid(buf, sizeof(buf), cpu); if (err != 0) { pr_err("Failed to look up CPU type: %s\n", str_error_r(err, buf, sizeof(buf))); @@ -2147,6 +2149,7 @@ int cmd_kvm(int argc, const char **argv) "buildid-list", "stat", NULL }; const char *kvm_usage[] = { NULL, NULL }; + exclude_GH_default = true; perf_host = 0; perf_guest = 1; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index c1daf82c9b92..8234410cba4c 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -23,7 +23,7 @@ #include <subcmd/pager.h> #include <subcmd/parse-options.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <errno.h> #include <inttypes.h> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 65b8cba324be..9e7fdfcdd7ff 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,6 +19,7 @@ #include "util/string2.h" #include "util/strlist.h" #include "util/strbuf.h" +#include "util/tool_pmu.h" #include <subcmd/pager.h> #include <subcmd/parse-options.h> #include <linux/zalloc.h> @@ -112,7 +113,7 @@ static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) } } -static void default_print_event(void *ps, const char *pmu_name, const char *topic, +static void default_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit __maybe_unused, bool deprecated, const char *event_type_desc, @@ -353,7 +354,7 @@ static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, .. fputs(buf->buf, fp); } -static void json_print_event(void *ps, const char *pmu_name, const char *topic, +static void json_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -614,9 +615,18 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { + char *old_pmu_glob = default_ps.pmu_glob; + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); + default_ps.pmu_glob = strdup("tool"); + if (!default_ps.pmu_glob) { + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); + default_ps.pmu_glob = old_pmu_glob; } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(&print_cb, ps); @@ -664,7 +674,6 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); print_hwcache_events(&print_cb, ps); perf_pmus__print_pmu_events(&print_cb, ps); print_tracepoint_events(&print_cb, ps); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 003a3bcebfdf..69800e4d9530 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -229,7 +229,7 @@ static int opt_set_target_ns(const struct option *opt __maybe_unused, /* Command option callbacks */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int opt_show_lines(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -505,7 +505,7 @@ out: return ret; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #define PROBEDEF_STR \ "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]" #else @@ -521,7 +521,7 @@ __cmd_probe(int argc, const char **argv) "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [<options>] --del '[GROUP:]EVENT' ...", "perf probe --list [GROUP:]EVENT ...", -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "perf probe [<options>] --line 'LINEDESC'", "perf probe [<options>] --vars 'PROBEPOINT'", #endif @@ -545,7 +545,7 @@ __cmd_probe(int argc, const char **argv) "\t\tFUNC:\tFunction name\n" "\t\tOFF:\tOffset from function entry (in byte)\n" "\t\t%return:\tPut the probe at function return\n" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "\t\tSRC:\tSource code path\n" "\t\tRL:\tRelative line number from function entry.\n" "\t\tAL:\tAbsolute line number in file.\n" @@ -612,11 +612,11 @@ __cmd_probe(int argc, const char **argv) set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); #else -# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c) +# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_LIBDW=1", c) set_nobuild('L', "line", false); set_nobuild('V', "vars", false); set_nobuild('\0', "externs", false); @@ -694,7 +694,7 @@ __cmd_probe(int argc, const char **argv) if (ret < 0) pr_err_with_code(" Error: Failed to show functions.", ret); return ret; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT case 'L': ret = show_line_range(¶ms->line_range, params->target, params->nsi, params->uprobes); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adbaf80b398c..f83252472921 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -4157,9 +4157,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0) { - bool can_profile_kernel = perf_event_paranoid_check(1); - - err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); + err = parse_event(rec->evlist, "cycles:P"); if (err) goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5dc17ffee27a..048c91960ba9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -70,7 +70,7 @@ #include <linux/mman.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct report { @@ -455,7 +455,7 @@ static int report__setup_sample_type(struct report *rep) if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_LIBDW_SUPPORT) if (dwarf_callchain_users) { ui__warning("Please install libunwind or libdw " "development packages during the perf build.\n"); @@ -1271,6 +1271,10 @@ static int process_attr(const struct perf_tool *tool __maybe_unused, return 0; } +#define CALLCHAIN_BRANCH_SORT_ORDER \ + "srcline,symbol,dso,callchain_branch_predicted," \ + "callchain_branch_abort,callchain_branch_cycles" + int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -1639,7 +1643,7 @@ repeat: symbol_conf.use_callchain = true; callchain_register_param(&callchain_param); if (sort_order == NULL) - sort_order = "srcline,symbol,dso"; + sort_order = CALLCHAIN_BRANCH_SORT_ORDER; } if (report.mem_mode) { @@ -1701,7 +1705,7 @@ repeat: report.data_type = true; annotate_opts.annotate_src = false; -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); goto error; #endif diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5981cc51abc8..7049c60ebf77 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -68,7 +68,6 @@ struct task_desc { struct sched_atom **atoms; pthread_t thread; - sem_t sleep_sem; sem_t ready_for_work; sem_t work_done_sem; @@ -80,12 +79,10 @@ enum sched_event_type { SCHED_EVENT_RUN, SCHED_EVENT_SLEEP, SCHED_EVENT_WAKEUP, - SCHED_EVENT_MIGRATION, }; struct sched_atom { enum sched_event_type type; - int specific_wait; u64 timestamp; u64 duration; unsigned long nr; @@ -228,6 +225,7 @@ struct perf_sched { bool show_wakeups; bool show_next; bool show_migrations; + bool pre_migrations; bool show_state; bool show_prio; u64 skipped_samples; @@ -247,7 +245,9 @@ struct thread_runtime { u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ u64 dt_delay; /* time between wakeup and sched-in */ + u64 dt_pre_mig; /* time between migration and wakeup */ u64 ready_to_run; /* time of wakeup */ + u64 migrated; /* time when a thread is migrated */ struct stats run_stats; u64 total_run_time; @@ -255,6 +255,7 @@ struct thread_runtime { u64 total_iowait_time; u64 total_preempt_time; u64 total_delay_time; + u64 total_pre_mig_time; char last_state; @@ -421,14 +422,13 @@ static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *t wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); sem_init(wakee_event->wait_sem, 0, 0); - wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; sched->nr_wakeup_events++; } static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, - u64 timestamp, const char task_state __maybe_unused) + u64 timestamp) { struct sched_atom *event = get_new_event(task, timestamp); @@ -468,7 +468,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, * every task starts in sleeping state - this gets ignored * if there's no wakeup pointing to this sleep state: */ - add_sched_event_sleep(sched, task, 0, 0); + add_sched_event_sleep(sched, task, 0); sched->pid_to_task[pid] = task; sched->nr_tasks++; @@ -529,8 +529,6 @@ static void perf_sched__process_event(struct perf_sched *sched, ret = sem_post(atom->wait_sem); BUG_ON(ret); break; - case SCHED_EVENT_MIGRATION: - break; default: BUG_ON(1); } @@ -673,7 +671,6 @@ static void create_tasks(struct perf_sched *sched) parms->task = task = sched->tasks[i]; parms->sched = sched; parms->fd = self_open_counters(sched, i); - sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); task->curr_event = 0; @@ -697,7 +694,6 @@ static void destroy_tasks(struct perf_sched *sched) task = sched->tasks[i]; err = pthread_join(task->thread, NULL); BUG_ON(err); - sem_destroy(&task->sleep_sem); sem_destroy(&task->ready_for_work); sem_destroy(&task->work_done_sem); } @@ -751,7 +747,6 @@ static void wait_for_tasks(struct perf_sched *sched) for (i = 0; i < sched->nr_tasks; i++) { task = sched->tasks[i]; - sem_init(&task->sleep_sem, 0, 0); task->curr_event = 0; } } @@ -852,7 +847,6 @@ static int replay_switch_event(struct perf_sched *sched, *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), next_pid = evsel__intval(evsel, sample, "next_pid"); - const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -884,7 +878,7 @@ static int replay_switch_event(struct perf_sched *sched, sched->cpu_last_switched[cpu] = timestamp; add_sched_event_run(sched, prev, timestamp, delta); - add_sched_event_sleep(sched, prev, timestamp, prev_state); + add_sched_event_sleep(sched, prev, timestamp); return 0; } @@ -1749,7 +1743,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } if (sched->map.comp && new_cpu) - color_fprintf(stdout, color, " (CPU %d)", this_cpu); + color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu); if (proceed != 1) { color_fprintf(stdout, color, "\n"); @@ -2083,14 +2077,15 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "task name", MAX_PRIO_STR_LEN, "prio", - "wait time", "sch delay", "run time"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "task name", "wait time", "sch delay", "run time"); - } + printf(" %-*s", comm_width, "task name"); + + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, "prio"); + + printf(" %9s %9s %9s", "wait time", "sch delay", "run time"); + + if (sched->pre_migrations) + printf(" %9s", "pre-mig time"); if (sched->show_state) printf(" %s", "state"); @@ -2105,17 +2100,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "[tid/pid]", MAX_PRIO_STR_LEN, "", - "(msec)", "(msec)", "(msec)"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "[tid/pid]", "(msec)", "(msec)", "(msec)"); - } + printf(" %-*s", comm_width, "[tid/pid]"); - if (sched->show_state) - printf(" %5s", ""); + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, ""); + + printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)"); + + if (sched->pre_migrations) + printf(" %9s", "(msec)"); printf("\n"); @@ -2127,15 +2120,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - if (sched->show_prio) { - printf(" %.*s %.*s %.9s %.9s %.9s", - comm_width, graph_dotted_line, MAX_PRIO_STR_LEN, graph_dotted_line, - graph_dotted_line, graph_dotted_line, graph_dotted_line); - } else { - printf(" %.*s %.9s %.9s %.9s", comm_width, - graph_dotted_line, graph_dotted_line, graph_dotted_line, - graph_dotted_line); - } + printf(" %.*s", comm_width, graph_dotted_line); + + if (sched->show_prio) + printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line); + + printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line); + + if (sched->pre_migrations) + printf(" %.9s", graph_dotted_line); if (sched->show_state) printf(" %.5s", graph_dotted_line); @@ -2190,6 +2183,8 @@ static void timehist_print_sample(struct perf_sched *sched, print_sched_time(tr->dt_delay, 6); print_sched_time(tr->dt_run, 6); + if (sched->pre_migrations) + print_sched_time(tr->dt_pre_mig, 6); if (sched->show_state) printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); @@ -2227,18 +2222,21 @@ out: * last_time = time of last sched change event for current task * (i.e, time process was last scheduled out) * ready_to_run = time of wakeup for current task + * migrated = time of task migration to another CPU * - * -----|------------|------------|------------|------ - * last ready tprev t + * -----|-------------|-------------|-------------|-------------|----- + * last ready migrated tprev t * time to run * - * |-------- dt_wait --------| - * |- dt_delay -|-- dt_run --| + * |---------------- dt_wait ----------------| + * |--------- dt_delay ---------|-- dt_run --| + * |- dt_pre_mig -| * - * dt_run = run time of current task - * dt_wait = time between last schedule out event for task and tprev - * represents time spent off the cpu - * dt_delay = time between wakeup and schedule-in of task + * dt_run = run time of current task + * dt_wait = time between last schedule out event for task and tprev + * represents time spent off the cpu + * dt_delay = time between wakeup and schedule-in of task + * dt_pre_mig = time between wakeup and migration to another CPU */ static void timehist_update_runtime_stats(struct thread_runtime *r, @@ -2249,6 +2247,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->dt_iowait = 0; r->dt_preempt = 0; r->dt_run = 0; + r->dt_pre_mig = 0; if (tprev) { r->dt_run = t - tprev; @@ -2257,6 +2256,9 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); else r->dt_delay = tprev - r->ready_to_run; + + if ((r->migrated > r->ready_to_run) && (r->migrated < tprev)) + r->dt_pre_mig = r->migrated - r->ready_to_run; } if (r->last_time > tprev) @@ -2280,6 +2282,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_sleep_time += r->dt_sleep; r->total_iowait_time += r->dt_iowait; r->total_preempt_time += r->dt_preempt; + r->total_pre_mig_time += r->dt_pre_mig; } static bool is_idle_sample(struct perf_sample *sample, @@ -2693,9 +2696,13 @@ static int timehist_migrate_task_event(const struct perf_tool *tool, return -1; tr->migrations++; + tr->migrated = sample->time; /* show migrations if requested */ - timehist_print_migration_event(sched, evsel, sample, machine, thread); + if (sched->show_migrations) { + timehist_print_migration_event(sched, evsel, sample, + machine, thread); + } return 0; } @@ -2846,11 +2853,13 @@ out: /* last state is used to determine where to account wait time */ tr->last_state = state; - /* sched out event for task so reset ready to run time */ + /* sched out event for task so reset ready to run time and migrated time */ if (state == 'R') tr->ready_to_run = t; else tr->ready_to_run = 0; + + tr->migrated = 0; } evsel__save_time(evsel, sample->time, sample->cpu); @@ -3290,8 +3299,8 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; } - if (sched->show_migrations && - perf_session__set_tracepoints_handlers(session, migrate_handlers)) + if ((sched->show_migrations || sched->pre_migrations) && + perf_session__set_tracepoints_handlers(session, migrate_handlers)) goto out; /* pre-allocate struct for per-CPU idle stats */ @@ -3833,6 +3842,7 @@ int cmd_sched(int argc, const char **argv) OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), OPT_STRING(0, "prio", &sched.prio_str, "prio", "analyze events only for given task priority(ies)"), + OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"), OPT_PARENT(sched_options) }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a644787fa9e1..9e47905f75a6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -67,7 +67,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static char const *script_name; @@ -1728,6 +1728,7 @@ static struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"}, {0, NULL} }; @@ -2136,11 +2137,11 @@ struct metric_ctx { }; static void script_print_metric(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color, - const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct metric_ctx *mctx = ctx; + const char *color = metric_threshold_classify__color(thresh); if (!fmt) return; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 689a3d43c258..fdf5172646a5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,6 +46,7 @@ #include "util/parse-events.h" #include "util/pmus.h" #include "util/pmu.h" +#include "util/tool_pmu.h" #include "util/event.h" #include "util/evlist.h" #include "util/evsel.h" @@ -294,14 +295,14 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa * terminates. Use the wait4 values in that case. */ if (err && cpu_map_idx == 0 && - (evsel__tool_event(counter) == PERF_TOOL_USER_TIME || - evsel__tool_event(counter) == PERF_TOOL_SYSTEM_TIME)) { + (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME || + evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) { u64 val, *start_time; struct perf_counts_values *count = perf_counts(counter->counts, cpu_map_idx, thread); start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread); - if (evsel__tool_event(counter) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME) val = ru_stats.ru_utime_usec_stat.mean; else val = ru_stats.ru_stime_usec_stat.mean; @@ -639,8 +640,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) * (behavior changed with commit b0a873e). */ if (errno == EINVAL || errno == ENOSYS || - errno == ENOENT || errno == EOPNOTSUPP || - errno == ENXIO) { + errno == ENOENT || errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", evsel__name(counter)); @@ -658,7 +658,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; - } else if (target__has_per_thread(&target) && + } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP && evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) { /* @@ -679,6 +679,19 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) return COUNTER_SKIP; } + if (errno == EOPNOTSUPP) { + if (verbose > 0) { + ui__warning("%s event is not supported by the kernel.\n", + evsel__name(counter)); + } + counter->supported = false; + counter->errored = true; + + if ((evsel__leader(counter) != counter) || + !(counter->core.leader->nr_members > 1)) + return COUNTER_SKIP; + } + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); @@ -716,15 +729,19 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return -1; + if (affinity__setup(&saved_affinity) < 0) { + err = -1; + goto err_out; + } affinity = &saved_affinity; } evlist__for_each_entry(evsel_list, counter) { counter->reset_group = false; - if (bpf_counter__load(counter, &target)) - return -1; + if (bpf_counter__load(counter, &target)) { + err = -1; + goto err_out; + } if (!(evsel__is_bperf(counter))) all_counters_use_bpf = false; } @@ -767,7 +784,8 @@ try_again: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again; case COUNTER_SKIP: @@ -808,7 +826,8 @@ try_again_reset: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again_reset; case COUNTER_SKIP: @@ -821,6 +840,7 @@ try_again_reset: } } affinity__cleanup(affinity); + affinity = NULL; evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { @@ -833,8 +853,10 @@ try_again_reset: stat_config.unit_width = l; if (evsel__should_store_id(counter) && - evsel__store_ids(counter, evsel_list)) - return -1; + evsel__store_ids(counter, evsel_list)) { + err = -1; + goto err_out; + } } if (evlist__apply_filters(evsel_list, &counter, &target)) { @@ -855,20 +877,23 @@ try_again_reset: } if (err < 0) - return err; + goto err_out; err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, process_synthesized_event, is_pipe); if (err < 0) - return err; + goto err_out; + } if (target.initial_delay) { pr_info(EVLIST_DISABLED_MSG); } else { err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } } /* Exec the command, if any */ @@ -878,8 +903,10 @@ try_again_reset: if (target.initial_delay > 0) { usleep(target.initial_delay * USEC_PER_MSEC); err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } pr_info(EVLIST_ENABLED_MSG); } @@ -899,7 +926,8 @@ try_again_reset: if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); - return -1; + err = -1; + goto err_out; } if (WIFSIGNALED(status)) @@ -946,8 +974,23 @@ try_again_reset: evlist__close(evsel_list); return WEXITSTATUS(status); + +err_out: + if (forks) + evlist__cancel_workload(evsel_list); + + affinity__cleanup(affinity); + return err; } +/* + * Returns -1 for fatal errors which signifies to not continue + * when in repeat mode. + * + * Returns < -1 error codes when stat record is used. These + * result in the stat information being displayed, but writing + * to the file fails and is non fatal. + */ static int run_perf_stat(int argc, const char **argv, int run_idx) { int ret; @@ -1814,130 +1857,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } /* - * Add default attributes, if there were no attributes specified or + * Add default events, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ -static int add_default_attributes(void) +static int add_default_events(void) { - struct perf_event_attr default_attrs0[] = { - - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, -}; - struct perf_event_attr frontend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, -}; - struct perf_event_attr backend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, -}; - struct perf_event_attr default_attrs1[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, - -}; - -/* - * Detailed stats (-d), covering the L1 and last level data caches: - */ - struct perf_event_attr detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; - -/* - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: - */ - struct perf_event_attr very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - -}; + const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + struct parse_events_error err; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + int ret = 0; -/* - * Very, very detailed stats (-d -d -d), adding prefetch events: - */ - struct perf_event_attr very_very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; + if (!evlist) + return -ENOMEM; - struct perf_event_attr default_null_attrs[] = {}; - const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + parse_events_error__init(&err); /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) - return 0; + goto out; if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics @@ -1947,9 +1885,10 @@ static int add_default_attributes(void) */ if (!metricgroup__has_metric(pmu, "transaction")) { pr_err("Missing transaction metrics\n"); - return -1; + ret = -1; + goto out; } - return metricgroup__parse_groups(evsel_list, pmu, "transaction", + ret = metricgroup__parse_groups(evlist, pmu, "transaction", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1957,6 +1896,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (smi_cost) { @@ -1964,26 +1904,29 @@ static int add_default_attributes(void) if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { pr_err("freeze_on_smi is not supported.\n"); - return -1; + ret = -1; + goto out; } if (!smi) { if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { - fprintf(stderr, "Failed to set freeze_on_smi.\n"); - return -1; + pr_err("Failed to set freeze_on_smi.\n"); + ret = -1; + goto out; } smi_reset = true; } if (!metricgroup__has_metric(pmu, "smi")) { pr_err("Missing smi metrics\n"); - return -1; + ret = -1; + goto out; } if (!force_metric_only) stat_config.metric_only = true; - return metricgroup__parse_groups(evsel_list, pmu, "smi", + ret = metricgroup__parse_groups(evlist, pmu, "smi", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1991,6 +1934,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (topdown_run) { @@ -2003,21 +1947,23 @@ static int add_default_attributes(void) if (!max_level) { pr_err("Topdown requested but the topdown metric groups aren't present.\n" "(See perf list the metric groups have names like TopdownL1)\n"); - return -1; + ret = -1; + goto out; } if (stat_config.topdown_level > max_level) { pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level); - return -1; - } else if (!stat_config.topdown_level) + ret = -1; + goto out; + } else if (!stat_config.topdown_level) { stat_config.topdown_level = 1; - + } if (!stat_config.interval && !stat_config.metric_only) { fprintf(stat_config.output, "Topdown accuracy may decrease when measuring long periods.\n" "Please print the result regularly, e.g. -I1000\n"); } str[8] = stat_config.topdown_level + '0'; - if (metricgroup__parse_groups(evsel_list, + if (metricgroup__parse_groups(evlist, pmu, str, /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2025,41 +1971,49 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; + } } if (!stat_config.topdown_level) stat_config.topdown_level = 1; - if (!evsel_list->core.nr_entries) { + if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { /* No events so add defaults. */ if (target__has_cpu(&target)) - default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + ret = parse_events(evlist, "cpu-clock", &err); + else + ret = parse_events(evlist, "task-clock", &err); + if (ret) + goto out; + + ret = parse_events(evlist, + "context-switches," + "cpu-migrations," + "page-faults," + "instructions," + "cycles," + "stalled-cycles-frontend," + "stalled-cycles-backend," + "branches," + "branch-misses", + &err); + if (ret) + goto out; - if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) - return -1; - if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) { - if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) - return -1; - } - if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) { - if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) - return -1; - } - if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) - return -1; /* * Add TopdownL1 metrics if they exist. To minimize * multiplexing, don't request threshold computation. */ if (metricgroup__has_metric(pmu, "Default")) { struct evlist *metric_evlist = evlist__new(); - struct evsel *metric_evsel; - - if (!metric_evlist) - return -1; + if (!metric_evlist) { + ret = -ENOMEM; + goto out; + } if (metricgroup__parse_groups(metric_evlist, pmu, "Default", /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2067,43 +2021,71 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; - - evlist__for_each_entry(metric_evlist, metric_evsel) { - metric_evsel->skippable = true; - metric_evsel->default_metricgroup = true; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; } - evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries); + + evlist__for_each_entry(metric_evlist, evsel) + evsel->default_metricgroup = true; + + evlist__splice_list_tail(evlist, &metric_evlist->core.entries); evlist__delete(metric_evlist); } - - /* Platform specific attrs */ - if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0) - return -1; } /* Detailed events get appended to the event list: */ - if (detailed_run < 1) - return 0; - - /* Append detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) - return -1; - - if (detailed_run < 2) - return 0; - - /* Append very detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) - return -1; - - if (detailed_run < 3) - return 0; - - /* Append very, very detailed run extra attributes: */ - return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + if (!ret && detailed_run >= 1) { + /* + * Detailed stats (-d), covering the L1 and last level data + * caches: + */ + ret = parse_events(evlist, + "L1-dcache-loads," + "L1-dcache-load-misses," + "LLC-loads," + "LLC-load-misses", + &err); + } + if (!ret && detailed_run >= 2) { + /* + * Very detailed stats (-d -d), covering the instruction cache + * and the TLB caches: + */ + ret = parse_events(evlist, + "L1-icache-loads," + "L1-icache-load-misses," + "dTLB-loads," + "dTLB-load-misses," + "iTLB-loads," + "iTLB-load-misses", + &err); + } + if (!ret && detailed_run >= 3) { + /* + * Very, very detailed stats (-d -d -d), adding prefetch events: + */ + ret = parse_events(evlist, + "L1-dcache-prefetches," + "L1-dcache-prefetch-misses", + &err); + } +out: + if (!ret) { + evlist__for_each_entry(evlist, evsel) { + /* + * Make at least one event non-skippable so fatal errors are visible. + * 'cycles' always used to be default and non-skippable, so use that. + */ + if (strcmp("cycles", evsel__name(evsel))) + evsel->skippable = true; + } + } + parse_events_error__exit(&err); + evlist__splice_list_tail(evsel_list, &evlist->core.entries); + evlist__delete(evlist); + return ret; } static const char * const stat_record_usage[] = { @@ -2591,6 +2573,14 @@ int cmd_stat(int argc, const char **argv) goto out; } + if (stat_config.csv_output || (stat_config.metric_only && stat_config.json_output)) { + /* + * Current CSV and metric-only JSON output doesn't display the + * metric threshold so don't compute it. + */ + stat_config.metric_no_threshold = true; + } + if (stat_config.walltime_run_table && stat_config.run_count <= 1) { fprintf(stderr, "--table is only supported with -r\n"); parse_options_usage(stat_usage, stat_options, "r", 1); @@ -2651,6 +2641,7 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + target.inherit = !stat_config.no_inherit; err = target__validate(&target); if (err) { target__strerror(&target, err, errbuf, BUFSIZ); @@ -2760,7 +2751,7 @@ int cmd_stat(int argc, const char **argv) } } - if (add_default_attributes()) + if (add_default_events()) goto out; if (stat_config.cgroup_list) { @@ -2879,7 +2870,10 @@ int cmd_stat(int argc, const char **argv) evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1 && !interval) { + if (status == -1) + break; + + if (forever && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 218c8b44d7be..068d297aaf44 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -38,7 +38,7 @@ #include "util/tracepoint.h" #include "util/util.h" #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -1158,7 +1158,6 @@ static void draw_io_bars(struct timechart *tchart) } svg_box(Y, c->start_time, c->end_time, "process3"); - sample = c->io_samples; for (sample = c->io_samples; sample; sample = sample->next) { double h = (double)sample->bytes / c->max_bytes; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f6e847529073..6a1a128fe645 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -88,7 +88,7 @@ #include <perf/mmap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #ifndef O_CLOEXEC @@ -1399,7 +1399,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, - { .name = "write", .errpid = true, + { .name = "write", .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, }, }; @@ -1873,7 +1873,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, switch (event->header.type) { case PERF_RECORD_LOST: color_fprintf(trace->output, PERF_COLOR_RED, - "LOST %" PRIu64 " events!\n", event->lost.lost); + "LOST %" PRIu64 " events!\n", (u64)event->lost.lost); ret = machine__process_lost_event(machine, event, sample); break; default: @@ -2702,6 +2702,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, char msg[1024]; void *args, *augmented_args = NULL; int augmented_args_size; + size_t printed = 0; if (sc == NULL) return -1; @@ -2717,8 +2718,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size); - syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); - fprintf(trace->output, "%s", msg); + printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); + fprintf(trace->output, "%.*s", (int)printed, msg); err = 0; out_put: thread__put(thread); @@ -3087,7 +3088,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%s", bf); + return printed + fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -3096,13 +3097,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, { struct thread *thread; int callchain_ret = 0; - /* - * Check if we called perf_evsel__disable(evsel) due to, for instance, - * this event's max_events having been hit and this is an entry coming - * from the ring buffer that we should discard, since the max events - * have already been considered/printed. - */ - if (evsel->disabled) + + if (evsel->nr_events_printed >= evsel->max_events) return 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); @@ -4326,6 +4322,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) sizeof(__u32), BPF_ANY); } } + + if (trace->skel) + trace->filter_pids.map = trace->skel->maps.pids_filtered; #endif err = trace__set_filter_pids(trace); if (err < 0) @@ -5449,6 +5448,10 @@ init_augmented_syscall_tp: if (trace.summary_only) trace.summary = trace.summary_only; + /* Keep exited threads, otherwise information might be lost for summary */ + if (trace.summary) + symbol_conf.keep_exited_threads = true; + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { diff --git a/tools/perf/check-header_ignore_hunks/lib/list_sort.c b/tools/perf/check-header_ignore_hunks/lib/list_sort.c new file mode 100644 index 000000000000..b7316d29857d --- /dev/null +++ b/tools/perf/check-header_ignore_hunks/lib/list_sort.c @@ -0,0 +1,24 @@ +@@ -50,6 +50,7 @@ + struct list_head *a, struct list_head *b) + { + struct list_head *tail = head; ++ u8 count = 0; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ +@@ -75,6 +76,15 @@ + /* Finish linking remainder of list b on to tail */ + tail->next = b; + do { ++ /* ++ * If the merge is highly unbalanced (e.g. the input is ++ * already sorted), this loop may run many iterations. ++ * Continue callbacks to the client even though no ++ * element comparison is needed, so the client's cmp() ++ * routine can invoke cond_resched() periodically. ++ */ ++ if (unlikely(!++count)) ++ cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 5d4f74b30102..a05c1c105c51 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -22,6 +22,7 @@ FILES=( "include/vdso/bits.h" "include/linux/const.h" "include/vdso/const.h" + "include/vdso/unaligned.h" "include/linux/hash.h" "include/linux/list-sort.h" "include/uapi/linux/hw_breakpoint.h" @@ -136,6 +137,30 @@ beauty_check () { check_2 "tools/perf/trace/beauty/$file" "$file" "$@" } +check_ignore_some_hunks () { + orig_file="$1" + tools_file="tools/$orig_file" + hunks_to_ignore="tools/perf/check-header_ignore_hunks/$orig_file" + + if [ ! -f "$hunks_to_ignore" ]; then + echo "$hunks_to_ignore not found. Skipping $orig_file check." + FAILURES+=( + "$tools_file $orig_file" + ) + return + fi + + cmd="diff -u \"$tools_file\" \"$orig_file\" | grep -vf \"$hunks_to_ignore\" | wc -l | grep -qw 0" + + if [ -f "$orig_file" ] && ! eval "$cmd" + then + FAILURES+=( + "$tools_file $orig_file" + ) + fi +} + + # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. @@ -169,7 +194,6 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' -check lib/list_sort.c '-I "^#include <linux/bug.h>"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl @@ -187,6 +211,10 @@ done check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c +# Files with larger differences + +check_ignore_some_hunks lib/list_sort.c + cd tools/perf || exit if [ ${#FAILURES[@]} -gt 0 ] diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 4083b1abeaab..4ca2d7b2ea6c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -220,7 +220,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 32ff619e881c..00d73a16c4fd 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -235,7 +235,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4def800f4089..a2987f2cfe1a 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -542,8 +542,6 @@ int main(int argc, const char **argv) } cmd = argv[0]; - test_attr__init(); - /* * We use PATH to find perf commands, but we prepend some higher * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json new file mode 100644 index 000000000000..74ac12660a29 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json @@ -0,0 +1,9 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx91_ddr.cycles", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json new file mode 100644 index 000000000000..f0c5911eb2d0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "bandwidth usage for lpddr4 evk board", + "MetricName": "imx91_bandwidth_usage.lpddr4", + "MetricExpr": "(((( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8 ) + (( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32 )) / duration_time) / (2400 * 1000000 * 2)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters read from ddr", + "MetricName": "imx91_ddr_read.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters write to ddr", + "MetricName": "imx91_ddr_write.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json index 126ce980f6f2..45a0d51dfb63 100644 --- a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json @@ -8,6 +8,14 @@ "Compat": "imx95" }, { + "BriefDescription": "bandwidth usage for lpddr4x evk board", + "MetricName": "imx95_bandwidth_usage.lpddr4x", + "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4000 * 1000000 * 4)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx95" + }, + { "BriefDescription": "bytes of all masters read from ddr", "MetricName": "imx95_ddr_read.all", "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32", diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index 6463531b9941..b6a0d2de8534 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -3,235 +3,235 @@ "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", "PublicDescription": "Frontend bound L1 topdown metric", "BriefDescription": "Frontend bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "frontend_bound" }, { "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", "PublicDescription": "Bad Speculation L1 topdown metric", "BriefDescription": "Bad Speculation L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "bad_speculation" }, { "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", "PublicDescription": "Retiring L1 topdown metric", "BriefDescription": "Retiring L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "retiring" }, { "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", "PublicDescription": "Backend Bound L1 topdown metric", "BriefDescription": "Backend Bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "backend_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", "PublicDescription": "Fetch latency bound L2 topdown metric", "BriefDescription": "Fetch latency bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_latency_bound" }, { "MetricExpr": "frontend_bound - fetch_latency_bound", "PublicDescription": "Fetch bandwidth bound L2 topdown metric", "BriefDescription": "Fetch bandwidth bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_bandwidth_bound" }, { "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", "PublicDescription": "Branch mispredicts L2 topdown metric", "BriefDescription": "Branch mispredicts L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "branch_mispredicts" }, { "MetricExpr": "bad_speculation - branch_mispredicts", "PublicDescription": "Machine clears L2 topdown metric", "BriefDescription": "Machine clears L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "machine_clears" }, { "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", "PublicDescription": "Core bound L2 topdown metric", "BriefDescription": "Core bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "core_bound" }, { "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", "PublicDescription": "Memory bound L2 topdown metric", "BriefDescription": "Memory bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "memory_bound" }, { "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by itlb miss L3 topdown metric", "BriefDescription": "Idle by itlb miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_itlb_miss" }, { "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by icache miss L3 topdown metric", "BriefDescription": "Idle by icache miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_icache_miss" }, { "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", "PublicDescription": "BP misp flush L3 topdown metric", "BriefDescription": "BP misp flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "bp_misp_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", "PublicDescription": "OOO flush L3 topdown metric", "BriefDescription": "OOO flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ooo_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", "PublicDescription": "Static predictor flush L3 topdown metric", "BriefDescription": "Static predictor flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sp_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", "PublicDescription": "Indirect branch L3 topdown metric", "BriefDescription": "Indirect branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "indirect_branch" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1013@ + armv8_pmuv3_0@event\\=0x1016@) / BR_MIS_PRED", "PublicDescription": "Push branch L3 topdown metric", "BriefDescription": "Push branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "push_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x100d@ / BR_MIS_PRED", "PublicDescription": "Pop branch L3 topdown metric", "BriefDescription": "Pop branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pop_branch" }, { "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1013@ - armv8_pmuv3_0@event\\=0x1016@ - armv8_pmuv3_0@event\\=0x100d@) / BR_MIS_PRED", "PublicDescription": "Other branch L3 topdown metric", "BriefDescription": "Other branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", "PublicDescription": "Nuke flush L3 topdown metric", "BriefDescription": "Nuke flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "nuke_flush" }, { "MetricExpr": "1 - nuke_flush", "PublicDescription": "Other flush L3 topdown metric", "BriefDescription": "Other flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", "PublicDescription": "Sync stall L3 topdown metric", "BriefDescription": "Sync stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sync_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", "PublicDescription": "Rob stall L3 topdown metric", "BriefDescription": "Rob stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "rob_stall" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", "PublicDescription": "Ptag stall L3 topdown metric", "BriefDescription": "Ptag stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ptag_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", "PublicDescription": "SaveOpQ stall L3 topdown metric", "BriefDescription": "SaveOpQ stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "saveopq_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", "PublicDescription": "PC buffer stall L3 topdown metric", "BriefDescription": "PC buffer stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pc_buffer_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", "PublicDescription": "Divider L3 topdown metric", "BriefDescription": "Divider L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "divider" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", "PublicDescription": "FSU stall L3 topdown metric", "BriefDescription": "FSU stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "fsu_stall" }, { "MetricExpr": "core_bound - divider - fsu_stall", "PublicDescription": "EXE ports util L3 topdown metric", "BriefDescription": "EXE ports util L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "exe_ports_util" }, { "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", "PublicDescription": "L1 bound L3 topdown metric", "BriefDescription": "L1 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l1_bound" }, { "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", "PublicDescription": "L2 bound L3 topdown metric", "BriefDescription": "L2 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l2_bound" }, { "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", "PublicDescription": "Mem bound L3 topdown metric", "BriefDescription": "Mem bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "mem_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", "PublicDescription": "Store bound L3 topdown metric", "BriefDescription": "Store bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "store_bound" } ] diff --git a/tools/perf/pmu-events/arch/common/common/tool.json b/tools/perf/pmu-events/arch/common/common/tool.json new file mode 100644 index 000000000000..12f2ef1813a6 --- /dev/null +++ b/tools/perf/pmu-events/arch/common/common/tool.json @@ -0,0 +1,74 @@ +[ + { + "Unit": "tool", + "EventName": "duration_time", + "BriefDescription": "Wall clock interval time in nanoseconds", + "ConfigCode": "1" + }, + { + "Unit": "tool", + "EventName": "user_time", + "BriefDescription": "User (non-kernel) time in nanoseconds", + "ConfigCode": "2" + }, + { + "Unit": "tool", + "EventName": "system_time", + "BriefDescription": "System/kernel time in nanoseconds", + "ConfigCode": "3" + }, + { + "Unit": "tool", + "EventName": "has_pmem", + "BriefDescription": "1 if persistent memory installed otherwise 0", + "ConfigCode": "4" + }, + { + "Unit": "tool", + "EventName": "num_cores", + "BriefDescription": "Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU", + "ConfigCode": "5" + }, + { + "Unit": "tool", + "EventName": "num_cpus", + "BriefDescription": "Number of logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "6" + }, + { + "Unit": "tool", + "EventName": "num_cpus_online", + "BriefDescription": "Number of online logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "7" + }, + { + "Unit": "tool", + "EventName": "num_dies", + "BriefDescription": "Number of dies. Each die has 1 or more cores", + "ConfigCode": "8" + }, + { + "Unit": "tool", + "EventName": "num_packages", + "BriefDescription": "Number of packages. Each package has 1 or more die", + "ConfigCode": "9" + }, + { + "Unit": "tool", + "EventName": "slots", + "BriefDescription": "Number of functional units that in parallel can execute parts of an instruction", + "ConfigCode": "10" + }, + { + "Unit": "tool", + "EventName": "smt_on", + "BriefDescription": "1 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0", + "ConfigCode": "11" + }, + { + "Unit": "tool", + "EventName": "system_tsc_freq", + "BriefDescription": "The amount a Time Stamp Counter (TSC) increases per second", + "ConfigCode": "12" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json new file mode 100644 index 000000000000..6f5e8efcb098 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json @@ -0,0 +1,117 @@ +[ + { + "EventCode": "0x600F4", + "EventName": "PM_CYC", + "BriefDescription": "Processor cycles." + }, + { + "EventCode": "0x100F2", + "EventName": "PM_CYC_INST_CMPL", + "BriefDescription": "1 or more ppc insts finished" + }, + { + "EventCode": "0x100f4", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operations Finished." + }, + { + "EventCode": "0x100F6", + "EventName": "PM_L1_ITLB_MISS", + "BriefDescription": "Number of I-ERAT reloads." + }, + { + "EventCode": "0x100F8", + "EventName": "PM_NO_INST_AVAIL", + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread." + }, + { + "EventCode": "0x100fc", + "EventName": "PM_LD_CMPL", + "BriefDescription": "Load instruction completed." + }, + { + "EventCode": "0x200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue)." + }, + { + "EventCode": "0x200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "PowerPC instruction dispatched." + }, + { + "EventCode": "0x200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Processor cycles gated by the run latch." + }, + { + "EventCode": "0x200F6", + "EventName": "PM_L1_DTLB_RELOAD", + "BriefDescription": "DERAT Reloaded due to a DERAT miss." + }, + { + "EventCode": "0x200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "Branch Taken instruction completed." + }, + { + "EventCode": "0x200FC", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand instruction cache miss." + }, + { + "EventCode": "0x200FE", + "EventName": "PM_L1_RELOAD_FROM_MEM", + "BriefDescription": "L1 Dcache reload from memory" + }, + { + "EventCode": "0x300F0", + "EventName": "PM_ST_MISS_L1", + "BriefDescription": "Store Missed L1" + }, + { + "EventCode": "0x300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "Data PTEG reload" + }, + { + "EventCode": "0x300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)" + }, + { + "EventCode": "0x400F0", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "L1 Dcache load miss" + }, + { + "EventCode": "0x400F2", + "EventName": "PM_CYC_INST_DISP", + "BriefDescription": "Cycle when instruction(s) dispatched." + }, + { + "EventCode": "0x400F6", + "EventName": "PM_BR_MPRED_CMPL", + "BriefDescription": "A mispredicted branch completed. Includes direction and target." + }, + { + "EventCode": "0x400FA", + "EventName": "PM_RUN_INST_CMPL", + "BriefDescription": "PowerPC instruction completed while the run latch is set." + }, + { + "EventCode": "0x400FC", + "EventName": "PM_ITLB_MISS", + "BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses." + }, + { + "EventCode": "0x400fe", + "EventName": "PM_LD_NOT_CACHED", + "BriefDescription": "Load data not cached." + }, + { + "EventCode": "0x500fa", + "EventName": "PM_INST_CMPL", + "BriefDescription": "Instructions." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index 4d5e9138d4cc..cbd3cb443784 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -16,3 +16,4 @@ 0x004e[[:xdigit:]]{4},1,power9,core 0x0080[[:xdigit:]]{4},1,power10,core 0x0082[[:xdigit:]]{4},1,power10,core +0x00ffffff,1,compat,core diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json new file mode 100644 index 000000000000..fa06569d881d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json @@ -0,0 +1,1634 @@ +[ + { + "EventName": "local_or_remote_socket_read_data_beats_dram_0", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_1", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_2", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_3", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_4", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_5", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_6", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_7", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_8", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_9", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_10", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_11", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_0", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_1", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_2", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_3", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_4", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_5", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json index af2fdf1f55d6..ff6627a77805 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json @@ -97,6 +97,12 @@ "UMask": "0x02" }, { + "EventName": "ls_dmnd_fills_from_sys.local_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_dmnd_fills_from_sys.near_cache", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -115,12 +121,30 @@ "UMask": "0x10" }, { + "EventName": "ls_dmnd_fills_from_sys.remote_cache", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_dmnd_fills_from_sys.dram_io_far", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_dmnd_fills_from_sys.dram_io_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_dmnd_fills_from_sys.far_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_dmnd_fills_from_sys.alternate_memories", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from extension memory.", @@ -193,12 +217,6 @@ "UMask": "0x50" }, { - "EventName": "ls_any_fills_from_sys.all_dram_io", - "EventCode": "0x44", - "BriefDescription": "Any data cache fills from either DRAM or MMIO in any NUMA node (same or different socket).", - "UMask": "0x48" - }, - { "EventName": "ls_any_fills_from_sys.alternate_memories", "EventCode": "0x44", "BriefDescription": "Any data cache fills from extension memory.", @@ -343,6 +361,12 @@ "UMask": "0x02" }, { + "EventName": "ls_sw_pf_dc_fills.local_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_sw_pf_dc_fills.near_cache", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from cache of another CCX in the same NUMA node.", @@ -361,12 +385,30 @@ "UMask": "0x10" }, { + "EventName": "ls_sw_pf_dc_fills.remote_cache", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_sw_pf_dc_fills.dram_io_far", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_sw_pf_dc_fills.dram_io_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_sw_pf_dc_fills.far_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_sw_pf_dc_fills.alternate_memories", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from extension memory.", @@ -391,6 +433,12 @@ "UMask": "0x02" }, { + "EventName": "ls_hw_pf_dc_fills.local_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_hw_pf_dc_fills.near_cache", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -409,12 +457,30 @@ "UMask": "0x10" }, { + "EventName": "ls_hw_pf_dc_fills.remote_cache", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_hw_pf_dc_fills.dram_io_far", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_hw_pf_dc_fills.dram_io_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_hw_pf_dc_fills.far_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_hw_pf_dc_fills.alternate_memories", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from extension memory.", diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json index c97874039c1e..635d57e3bc15 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json @@ -341,5 +341,117 @@ "MetricGroup": "memory_controller", "PerPkg": "1", "ScaleUnit": "1per_memclk" + }, + { + "MetricName": "dram_read_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM read data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_read_data_beats_dram_0 + local_or_remote_socket_read_data_beats_dram_1 + local_or_remote_socket_read_data_beats_dram_2 + local_or_remote_socket_read_data_beats_dram_3 + local_or_remote_socket_read_data_beats_dram_4 + local_or_remote_socket_read_data_beats_dram_5 + local_or_remote_socket_read_data_beats_dram_6 + local_or_remote_socket_read_data_beats_dram_7 + local_or_remote_socket_read_data_beats_dram_8 + local_or_remote_socket_read_data_beats_dram_9 + local_or_remote_socket_read_data_beats_dram_10 + local_or_remote_socket_read_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_write_data_beats_dram_0 + local_socket_write_data_beats_dram_1 + local_socket_write_data_beats_dram_2 + local_socket_write_data_beats_dram_3 + local_socket_write_data_beats_dram_4 + local_socket_write_data_beats_dram_5 + local_socket_write_data_beats_dram_6 + local_socket_write_data_beats_dram_7 + local_socket_write_data_beats_dram_8 + local_socket_write_data_beats_dram_9 + local_socket_write_data_beats_dram_10 + local_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_write_data_beats_dram_0 + remote_socket_write_data_beats_dram_1 + remote_socket_write_data_beats_dram_2 + remote_socket_write_data_beats_dram_3 + remote_socket_write_data_beats_dram_4 + remote_socket_write_data_beats_dram_5 + remote_socket_write_data_beats_dram_6 + remote_socket_write_data_beats_dram_7 + remote_socket_write_data_beats_dram_8 + remote_socket_write_data_beats_dram_9 + remote_socket_write_data_beats_dram_10 + remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_write_data_beats_dram_0 + local_or_remote_socket_write_data_beats_dram_1 + local_or_remote_socket_write_data_beats_dram_2 + local_or_remote_socket_write_data_beats_dram_3 + local_or_remote_socket_write_data_beats_dram_4 + local_or_remote_socket_write_data_beats_dram_5 + local_or_remote_socket_write_data_beats_dram_6 + local_or_remote_socket_write_data_beats_dram_7 + local_or_remote_socket_write_data_beats_dram_8 + local_or_remote_socket_write_data_beats_dram_9 + local_or_remote_socket_write_data_beats_dram_10 + local_or_remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_read_data_beats_io_0 + local_socket_upstream_read_data_beats_io_1 + local_socket_upstream_read_data_beats_io_2 + local_socket_upstream_read_data_beats_io_3 + local_socket_upstream_read_data_beats_io_4 + local_socket_upstream_read_data_beats_io_5 + local_socket_upstream_read_data_beats_io_6 + local_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_write_data_beats_io_0 + local_socket_upstream_write_data_beats_io_1 + local_socket_upstream_write_data_beats_io_2 + local_socket_upstream_write_data_beats_io_3 + local_socket_upstream_write_data_beats_io_4 + local_socket_upstream_write_data_beats_io_5 + local_socket_upstream_write_data_beats_io_6 + local_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_read_data_beats_io_0 + remote_socket_upstream_read_data_beats_io_1 + remote_socket_upstream_read_data_beats_io_2 + remote_socket_upstream_read_data_beats_io_3 + remote_socket_upstream_read_data_beats_io_4 + remote_socket_upstream_read_data_beats_io_5 + remote_socket_upstream_read_data_beats_io_6 + remote_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_write_data_beats_io_0 + remote_socket_upstream_write_data_beats_io_1 + remote_socket_upstream_write_data_beats_io_2 + remote_socket_upstream_write_data_beats_io_3 + remote_socket_upstream_write_data_beats_io_4 + remote_socket_upstream_write_data_beats_io_5 + remote_socket_upstream_write_data_beats_io_6 + remote_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_inbound_data_beats_cfi_0 + local_socket_inbound_data_beats_cfi_1 + local_socket_inbound_data_beats_cfi_2 + local_socket_inbound_data_beats_cfi_3 + local_socket_inbound_data_beats_cfi_4 + local_socket_inbound_data_beats_cfi_5 + local_socket_inbound_data_beats_cfi_6 + local_socket_inbound_data_beats_cfi_7 + local_socket_inbound_data_beats_cfi_8 + local_socket_inbound_data_beats_cfi_9 + local_socket_inbound_data_beats_cfi_10 + local_socket_inbound_data_beats_cfi_11 + local_socket_inbound_data_beats_cfi_12 + local_socket_inbound_data_beats_cfi_13 + local_socket_inbound_data_beats_cfi_14 + local_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_outbound_data_beats_cfi_0 + local_socket_outbound_data_beats_cfi_1 + local_socket_outbound_data_beats_cfi_2 + local_socket_outbound_data_beats_cfi_3 + local_socket_outbound_data_beats_cfi_4 + local_socket_outbound_data_beats_cfi_5 + local_socket_outbound_data_beats_cfi_6 + local_socket_outbound_data_beats_cfi_7 + local_socket_outbound_data_beats_cfi_8 + local_socket_outbound_data_beats_cfi_9 + local_socket_outbound_data_beats_cfi_10 + local_socket_outbound_data_beats_cfi_11 + local_socket_outbound_data_beats_cfi_12 + local_socket_outbound_data_beats_cfi_13 + local_socket_outbound_data_beats_cfi_14 + local_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_inbound_data_beats_cfi_0 + remote_socket_inbound_data_beats_cfi_1 + remote_socket_inbound_data_beats_cfi_2 + remote_socket_inbound_data_beats_cfi_3 + remote_socket_inbound_data_beats_cfi_4 + remote_socket_inbound_data_beats_cfi_5 + remote_socket_inbound_data_beats_cfi_6 + remote_socket_inbound_data_beats_cfi_7 + remote_socket_inbound_data_beats_cfi_8 + remote_socket_inbound_data_beats_cfi_9 + remote_socket_inbound_data_beats_cfi_10 + remote_socket_inbound_data_beats_cfi_11 + remote_socket_inbound_data_beats_cfi_12 + remote_socket_inbound_data_beats_cfi_13 + remote_socket_inbound_data_beats_cfi_14 + remote_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_outbound_data_beats_cfi_0 + remote_socket_outbound_data_beats_cfi_1 + remote_socket_outbound_data_beats_cfi_2 + remote_socket_outbound_data_beats_cfi_3 + remote_socket_outbound_data_beats_cfi_4 + remote_socket_outbound_data_beats_cfi_5 + remote_socket_outbound_data_beats_cfi_6 + remote_socket_outbound_data_beats_cfi_7 + remote_socket_outbound_data_beats_cfi_8 + remote_socket_outbound_data_beats_cfi_9 + remote_socket_outbound_data_beats_cfi_10 + remote_socket_outbound_data_beats_cfi_11 + remote_socket_outbound_data_beats_cfi_12 + remote_socket_outbound_data_beats_cfi_13 + remote_socket_outbound_data_beats_cfi_14 + remote_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Inbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_inbound_data_beats_link_0 + local_socket_inbound_data_beats_link_1 + local_socket_inbound_data_beats_link_2 + local_socket_inbound_data_beats_link_3 + local_socket_inbound_data_beats_link_4 + local_socket_inbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Outbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_outbound_data_beats_link_0 + local_socket_outbound_data_beats_link_1 + local_socket_outbound_data_beats_link_2 + local_socket_outbound_data_beats_link_3 + local_socket_outbound_data_beats_link_4 + local_socket_outbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" } ] diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index c592079982fb..1c7a2cfa321f 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -19,72 +19,109 @@ struct pmu_table_entry { }; static const char *const big_c_string = -/* offset=0 */ "default_core\000" -/* offset=13 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" -/* offset=72 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" -/* offset=131 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" -/* offset=226 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" -/* offset=325 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" -/* offset=455 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" -/* offset=570 */ "hisi_sccl,ddrc\000" -/* offset=585 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" -/* offset=671 */ "uncore_cbox\000" -/* offset=683 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" -/* offset=914 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" -/* offset=979 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" -/* offset=1050 */ "hisi_sccl,l3c\000" -/* offset=1064 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" -/* offset=1144 */ "uncore_imc_free_running\000" -/* offset=1168 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" -/* offset=1263 */ "uncore_imc\000" -/* offset=1274 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" -/* offset=1352 */ "uncore_sys_ddr_pmu\000" -/* offset=1371 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" -/* offset=1444 */ "uncore_sys_ccn_pmu\000" -/* offset=1463 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" -/* offset=1537 */ "uncore_sys_cmn_pmu\000" -/* offset=1556 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" -/* offset=1696 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=1718 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=1781 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=1947 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2011 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2078 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=2149 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=2243 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=2377 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=2441 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2509 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2579 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=2601 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=2623 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=2643 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=0 */ "tool\000" +/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000" +/* offset=78 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000" +/* offset=145 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000" +/* offset=210 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000" +/* offset=283 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000" +/* offset=425 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000" +/* offset=525 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000" +/* offset=639 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000" +/* offset=712 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000" +/* offset=795 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000" +/* offset=902 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000" +/* offset=1006 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000" +/* offset=1102 */ "default_core\000" +/* offset=1115 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" +/* offset=1174 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" +/* offset=1233 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" +/* offset=1328 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" +/* offset=1427 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" +/* offset=1557 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" +/* offset=1672 */ "hisi_sccl,ddrc\000" +/* offset=1687 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" +/* offset=1773 */ "uncore_cbox\000" +/* offset=1785 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" +/* offset=2016 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" +/* offset=2081 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" +/* offset=2152 */ "hisi_sccl,l3c\000" +/* offset=2166 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" +/* offset=2246 */ "uncore_imc_free_running\000" +/* offset=2270 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" +/* offset=2365 */ "uncore_imc\000" +/* offset=2376 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" +/* offset=2454 */ "uncore_sys_ddr_pmu\000" +/* offset=2473 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" +/* offset=2546 */ "uncore_sys_ccn_pmu\000" +/* offset=2565 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" +/* offset=2639 */ "uncore_sys_cmn_pmu\000" +/* offset=2658 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" +/* offset=2798 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=2820 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=2883 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=3049 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3113 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3180 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=3251 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=3345 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=3479 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=3543 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3611 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3681 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=3703 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=3725 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=3745 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; +static const struct compact_pmu_event pmu_events__common_tool[] = { +{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000 */ +{ 210 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000 */ +{ 283 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000 */ +{ 425 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000 */ +{ 525 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000 */ +{ 639 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000 */ +{ 712 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000 */ +{ 795 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000 */ +{ 902 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000 */ +{ 145 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000 */ +{ 1006 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000 */ +{ 78 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000 */ + +}; + +const struct pmu_table_entry pmu_events__common[] = { +{ + .entries = pmu_events__common_tool, + .num_entries = ARRAY_SIZE(pmu_events__common_tool), + .pmu_name = { 0 /* tool\000 */ }, +}, +}; + static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { -{ 13 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ -{ 72 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ -{ 325 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ -{ 455 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ -{ 131 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ -{ 226 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ +{ 1115 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ +{ 1174 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ +{ 1427 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ +{ 1557 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ +{ 1233 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ +{ 1328 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 585 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ +{ 1687 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 1064 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ +{ 2166 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 914 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ -{ 979 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ -{ 683 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ +{ 2016 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ +{ 2081 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ +{ 1785 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 1274 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ +{ 2376 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 1168 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ +{ 2270 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ }; @@ -92,51 +129,51 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc), - .pmu_name = { 570 /* hisi_sccl,ddrc\000 */ }, + .pmu_name = { 1672 /* hisi_sccl,ddrc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 1050 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 2152 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 671 /* uncore_cbox\000 */ }, + .pmu_name = { 1773 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 1263 /* uncore_imc\000 */ }, + .pmu_name = { 2365 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 1144 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 2246 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 1696 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 2377 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 2149 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 2243 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 2441 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2509 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 1781 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 1718 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 2643 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 2579 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 2601 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 2623 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 2078 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 1947 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 2011 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 2798 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 3479 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 3251 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 3345 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 3543 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 3611 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 2883 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 2820 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 3745 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 3681 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 3703 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 3725 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 3180 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 3049 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 3113 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -144,18 +181,18 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { { .entries = pmu_metrics__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 1463 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ +{ 2565 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 1556 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ +{ 2658 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 1371 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ +{ 2473 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ }; @@ -163,17 +200,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 1444 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 2546 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 1537 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 2639 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 1352 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 2454 /* uncore_sys_ddr_pmu\000 */ }, }, }; @@ -211,6 +248,15 @@ struct pmu_events_map { */ const struct pmu_events_map pmu_events_map[] = { { + .arch = "common", + .cpuid = "common", + .event_table = { + .pmus = pmu_events__common, + .num_pmus = ARRAY_SIZE(pmu_events__common), + }, + .metric_table = {}, +}, +{ .arch = "testarch", .cpuid = "testcpu", .event_table = { @@ -380,7 +426,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -457,11 +503,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -472,10 +518,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -506,12 +552,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -532,24 +587,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - if (!pmu) - return &map->metric_table; - - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index bb0a5d92df4a..d781a377757a 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -292,6 +292,7 @@ class JsonEvent: 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', 'arm_cmn': 'arm_cmn', + 'tool': 'tool', } return table[unit] if unit in table else f'uncore_{unit.lower()}' @@ -722,6 +723,17 @@ const struct pmu_events_map pmu_events_map[] = { \t} }, """) + elif arch == 'common': + _args.output_file.write("""{ +\t.arch = "common", +\t.cpuid = "common", +\t.event_table = { +\t\t.pmus = pmu_events__common, +\t\t.num_pmus = ARRAY_SIZE(pmu_events__common), +\t}, +\t.metric_table = {}, +}, +""") else: with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile: table = csv.reader(csvfile) @@ -930,7 +942,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -1007,11 +1019,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -1022,10 +1034,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -1056,12 +1068,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -1082,24 +1103,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; - - if (!pmu) - return &map->metric_table; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) @@ -1241,7 +1250,7 @@ def main() -> None: if len(parents) == _args.model.split(',')[0].count('/'): # We're testing the correct directory. item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name - if 'test' not in item_path and item_path not in _args.model.split(','): + if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','): continue action(parents, item) if item.is_dir(): @@ -1289,7 +1298,7 @@ struct pmu_table_entry { for item in os.scandir(_args.starting_dir): if not item.is_dir(): continue - if item.name == _args.arch or _args.arch == 'all' or item.name == 'test': + if item.name == _args.arch or _args.arch == 'all' or item.name == 'test' or item.name == 'common': archs.append(item.name) if len(archs) < 2 and _args.arch != 'none': diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 5435ad92180c..675562e6f770 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -103,7 +103,7 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pm void *data); const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu); -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); +const struct pmu_metrics_table *pmu_metrics_table__find(void); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 3954bd1587ce..01f54d6724a5 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -12,6 +12,7 @@ #define PY_SSIZE_T_CLEAN #include <Python.h> +#include "../../../util/config.h" #include "../../../util/trace-event.h" #include "../../../util/event.h" #include "../../../util/symbol.h" @@ -182,6 +183,15 @@ static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args) return perf_sample_src(obj, args, true); } +static PyObject *__perf_config_get(PyObject *obj, PyObject *args) +{ + const char *config_name; + + if (!PyArg_ParseTuple(args, "s", &config_name)) + return NULL; + return Py_BuildValue("s", perf_config_get(config_name)); +} + static PyMethodDef ContextMethods[] = { #ifdef HAVE_LIBTRACEEVENT { "common_pc", perf_trace_context_common_pc, METH_VARARGS, @@ -199,6 +209,7 @@ static PyMethodDef ContextMethods[] = { METH_VARARGS, "Get source file name and line number."}, { "perf_sample_srccode", perf_sample_srccode, METH_VARARGS, "Get source file name, line number and line."}, + { "perf_config_get", __perf_config_get, METH_VARARGS, "Get perf config entry"}, { NULL, NULL, 0, NULL} }; diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..ba208c90d631 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -11,36 +11,74 @@ import os from os import path import re from subprocess import * -from optparse import OptionParser, make_option +import argparse +import platform -from perf_trace_context import perf_set_itrace_options, \ - perf_sample_insn, perf_sample_srccode +from perf_trace_context import perf_sample_srccode, perf_config_get # Below are some example commands for using this script. +# Note a --kcore recording is required for accurate decode +# due to the alternatives patching mechanism. However this +# script only supports reading vmlinux for disassembly dump, +# meaning that any patched instructions will appear +# as unpatched, but the instruction ranges themselves will +# be correct. In addition to this, source line info comes +# from Perf, and when using kcore there is no debug info. The +# following lists the supported features in each mode: +# +# +-----------+-----------------+------------------+------------------+ +# | Recording | Accurate decode | Source line dump | Disassembly dump | +# +-----------+-----------------+------------------+------------------+ +# | --kcore | yes | no | yes | +# | normal | no | yes | yes | +# +-----------+-----------------+------------------+------------------+ +# +# Output disassembly with objdump and auto detect vmlinux +# (when running on same machine.) +# perf script -s scripts/python/arm-cs-trace-disasm.py -d # -# Output disassembly with objdump: -# perf script -s scripts/python/arm-cs-trace-disasm.py \ -# -- -d objdump -k path/to/vmlinux # Output disassembly with llvm-objdump: # perf script -s scripts/python/arm-cs-trace-disasm.py \ # -- -d llvm-objdump-11 -k path/to/vmlinux +# # Output only source line and symbols: # perf script -s scripts/python/arm-cs-trace-disasm.py -# Command line parsing. -option_list = [ - # formatting options for the bottom entry of the stack - make_option("-k", "--vmlinux", dest="vmlinux_name", - help="Set path to vmlinux file"), - make_option("-d", "--objdump", dest="objdump_name", - help="Set path to objdump executable file"), - make_option("-v", "--verbose", dest="verbose", - action="store_true", default=False, - help="Enable debugging log") -] +def default_objdump(): + config = perf_config_get("annotate.objdump") + return config if config else "objdump" -parser = OptionParser(option_list=option_list) -(options, args) = parser.parse_args() +# Command line parsing. +def int_arg(v): + v = int(v) + if v < 0: + raise argparse.ArgumentTypeError("Argument must be a positive integer") + return v + +args = argparse.ArgumentParser() +args.add_argument("-k", "--vmlinux", + help="Set path to vmlinux file. Omit to autodetect if running on same machine") +args.add_argument("-d", "--objdump", nargs="?", const=default_objdump(), + help="Show disassembly. Can also be used to change the objdump path"), +args.add_argument("-v", "--verbose", action="store_true", help="Enable debugging log") +args.add_argument("--start-time", type=int_arg, help="Monotonic clock time of sample to start from. " + "See 'time' field on samples in -v mode.") +args.add_argument("--stop-time", type=int_arg, help="Monotonic clock time of sample to stop at. " + "See 'time' field on samples in -v mode.") +args.add_argument("--start-sample", type=int_arg, help="Index of sample to start from. " + "See 'index' field on samples in -v mode.") +args.add_argument("--stop-sample", type=int_arg, help="Index of sample to stop at. " + "See 'index' field on samples in -v mode.") + +options = args.parse_args() +if (options.start_time and options.stop_time and + options.start_time >= options.stop_time): + print("--start-time must less than --stop-time") + exit(2) +if (options.start_sample and options.stop_sample and + options.start_sample >= options.stop_sample): + print("--start-sample must less than --stop-sample") + exit(2) # Initialize global dicts and regular expression disasm_cache = dict() @@ -48,11 +86,23 @@ cpu_data = dict() disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 +sample_idx = -1 glb_source_file_name = None glb_line_number = None glb_dso = None +kver = platform.release() +vmlinux_paths = [ + f"/usr/lib/debug/boot/vmlinux-{kver}.debug", + f"/usr/lib/debug/lib/modules/{kver}/vmlinux", + f"/lib/modules/{kver}/build/vmlinux", + f"/usr/lib/debug/boot/vmlinux-{kver}", + f"/boot/vmlinux-{kver}", + f"/boot/vmlinux", + f"vmlinux" +] + def get_optional(perf_dict, field): if field in perf_dict: return perf_dict[field] @@ -63,12 +113,25 @@ def get_offset(perf_dict, field): return "+%#x" % perf_dict[field] return "" +def find_vmlinux(): + if hasattr(find_vmlinux, "path"): + return find_vmlinux.path + + for v in vmlinux_paths: + if os.access(v, os.R_OK): + find_vmlinux.path = v + break + else: + find_vmlinux.path = None + + return find_vmlinux.path + def get_dso_file_path(dso_name, dso_build_id): if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): - if (options.vmlinux_name): - return options.vmlinux_name; + if (options.vmlinux): + return options.vmlinux; else: - return dso_name + return find_vmlinux() if find_vmlinux() else dso_name if (dso_name == "[vdso]") : append = "/vdso" @@ -92,7 +155,7 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr): else: start_addr = start_addr - dso_start; stop_addr = stop_addr - dso_start; - disasm = [ options.objdump_name, "-d", "-z", + disasm = [ options.objdump, "-d", "-z", "--start-address="+format(start_addr,"#x"), "--stop-address="+format(stop_addr,"#x") ] disasm += [ dso_fname ] @@ -112,10 +175,10 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr): def print_sample(sample): print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \ - "pid: %d tid: %d period: %d time: %d }" % \ + "pid: %d tid: %d period: %d time: %d index: %d}" % \ (sample['cpu'], sample['addr'], sample['phys_addr'], \ sample['ip'], sample['pid'], sample['tid'], \ - sample['period'], sample['time'])) + sample['period'], sample['time'], sample_idx)) def trace_begin(): print('ARM CoreSight Trace Data Assembler Dump') @@ -177,6 +240,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso): def process_event(param_dict): global cache_size global options + global sample_idx sample = param_dict["sample"] comm = param_dict["comm"] @@ -187,11 +251,26 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + map_pgoff = get_optional(param_dict, "map_pgoff") + # check for valid map offset + if (str(map_pgoff) == '[unknown]'): + map_pgoff = 0 cpu = sample["cpu"] ip = sample["ip"] addr = sample["addr"] + sample_idx += 1 + + if (options.start_time and sample["time"] < options.start_time): + return + if (options.stop_time and sample["time"] > options.stop_time): + exit(0) + if (options.start_sample and sample_idx < options.start_sample): + return + if (options.stop_sample and sample_idx > options.stop_sample): + exit(0) + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -243,9 +322,10 @@ def process_event(param_dict): # Record for previous sample packet cpu_data[str(cpu) + 'addr'] = addr - # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 - if (start_addr == 0 and stop_addr == 4): - print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) + # Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet + if (start_addr == 0): + if ((stop_addr == 4) and (options.verbose == True)): + print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return if (start_addr < int(dso_start) or start_addr > int(dso_end)): @@ -256,19 +336,20 @@ def process_event(param_dict): print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) return - if (options.objdump_name != None): + if (options.objdump != None): # It doesn't need to decrease virtual memory offset for disassembly # for kernel dso and executable file dso, so in this case we set # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 + map_pgoff = 0 else: dso_vm_start = int(dso_start) dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname): - print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) else: - print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff)) print_srccode(comm, param_dict, sample, symbol, dso) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 5671ee530019..ec4e1f034742 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -4,7 +4,6 @@ perf-test-y += builtin-test.o perf-test-y += tests-scripts.o perf-test-y += parse-events.o perf-test-y += dso-data.o -perf-test-y += attr.o perf-test-y += vmlinux-kallsyms.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o @@ -67,12 +66,13 @@ perf-test-y += sigtrap.o perf-test-y += event_groups.o perf-test-y += symbols.o perf-test-y += util.o +perf-test-y += hwmon_pmu.o +perf-test-y += tool_pmu.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif -CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c deleted file mode 100644 index 97e1bdd6ec0e..000000000000 --- a/tools/perf/tests/attr.c +++ /dev/null @@ -1,218 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * The struct perf_event_attr test support. - * - * This test is embedded inside into perf directly and is governed - * by the PERF_TEST_ATTR environment variable and hook inside - * sys_perf_event_open function. - * - * The general idea is to store 'struct perf_event_attr' details for - * each event created within single perf command. Each event details - * are stored into separate text file. Once perf command is finished - * these files can be checked for values we expect for command. - * - * Besides 'struct perf_event_attr' values we also store 'fd' and - * 'group_fd' values to allow checking for groups created. - * - * This all is triggered by setting PERF_TEST_ATTR environment variable. - * It must contain name of existing directory with access and write - * permissions. All the event text files are stored there. - */ - -#include <debug.h> -#include <errno.h> -#include <inttypes.h> -#include <stdlib.h> -#include <stdio.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <subcmd/exec-cmd.h> -#include "event.h" -#include "util.h" -#include "tests.h" -#include "pmus.h" - -#define ENV "PERF_TEST_ATTR" - -static char *dir; -static bool ready; - -void test_attr__init(void) -{ - dir = getenv(ENV); - test_attr__enabled = (dir != NULL); -} - -#define BUFSIZE 1024 - -#define __WRITE_ASS(str, fmt, data) \ -do { \ - char buf[BUFSIZE]; \ - size_t size; \ - \ - size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data); \ - if (1 != fwrite(buf, size, 1, file)) { \ - perror("test attr - failed to write event file"); \ - fclose(file); \ - return -1; \ - } \ - \ -} while (0) - -#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) - -static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - FILE *file; - char path[PATH_MAX]; - - if (!ready) - return 0; - - snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, - attr->type, attr->config, fd); - - file = fopen(path, "w+"); - if (!file) { - perror("test attr - failed to open event file"); - return -1; - } - - if (fprintf(file, "[event-%d-%llu-%d]\n", - attr->type, attr->config, fd) < 0) { - perror("test attr - failed to write event file"); - fclose(file); - return -1; - } - - /* syscall arguments */ - __WRITE_ASS(fd, "d", fd); - __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu.cpu); - __WRITE_ASS(pid, "d", pid); - __WRITE_ASS(flags, "lu", flags); - - /* struct perf_event_attr */ - WRITE_ASS(type, PRIu32); - WRITE_ASS(size, PRIu32); - WRITE_ASS(config, "llu"); - WRITE_ASS(sample_period, "llu"); - WRITE_ASS(sample_type, "llu"); - WRITE_ASS(read_format, "llu"); - WRITE_ASS(disabled, "d"); - WRITE_ASS(inherit, "d"); - WRITE_ASS(pinned, "d"); - WRITE_ASS(exclusive, "d"); - WRITE_ASS(exclude_user, "d"); - WRITE_ASS(exclude_kernel, "d"); - WRITE_ASS(exclude_hv, "d"); - WRITE_ASS(exclude_idle, "d"); - WRITE_ASS(mmap, "d"); - WRITE_ASS(comm, "d"); - WRITE_ASS(freq, "d"); - WRITE_ASS(inherit_stat, "d"); - WRITE_ASS(enable_on_exec, "d"); - WRITE_ASS(task, "d"); - WRITE_ASS(watermark, "d"); - WRITE_ASS(precise_ip, "d"); - WRITE_ASS(mmap_data, "d"); - WRITE_ASS(sample_id_all, "d"); - WRITE_ASS(exclude_host, "d"); - WRITE_ASS(exclude_guest, "d"); - WRITE_ASS(exclude_callchain_kernel, "d"); - WRITE_ASS(exclude_callchain_user, "d"); - WRITE_ASS(mmap2, "d"); - WRITE_ASS(comm_exec, "d"); - WRITE_ASS(context_switch, "d"); - WRITE_ASS(write_backward, "d"); - WRITE_ASS(namespaces, "d"); - WRITE_ASS(use_clockid, "d"); - WRITE_ASS(wakeup_events, PRIu32); - WRITE_ASS(bp_type, PRIu32); - WRITE_ASS(config1, "llu"); - WRITE_ASS(config2, "llu"); - WRITE_ASS(branch_sample_type, "llu"); - WRITE_ASS(sample_regs_user, "llu"); - WRITE_ASS(sample_stack_user, PRIu32); - - fclose(file); - return 0; -} - -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - int errno_saved = errno; - - if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { - pr_err("test attr FAILED"); - exit(128); - } - - errno = errno_saved; -} - -void test_attr__ready(void) -{ - if (unlikely(test_attr__enabled) && !ready) - ready = true; -} - -static int run_dir(const char *d, const char *perf) -{ - char v[] = "-vvvvv"; - int vcnt = min(verbose, (int) sizeof(v) - 1); - char cmd[3*PATH_MAX]; - - if (verbose > 0) - vcnt++; - - scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); - - return system(cmd) ? TEST_FAIL : TEST_OK; -} - -static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct stat st; - char path_perf[PATH_MAX]; - char path_dir[PATH_MAX]; - char *exec_path; - - if (perf_pmus__num_core_pmus() > 1) { - /* - * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type which - * requires additional support. - */ - pr_debug("Skip test on hybrid systems"); - return TEST_SKIP; - } - - /* First try development tree tests. */ - if (!lstat("./tests", &st)) - return run_dir("./tests", "./perf"); - - exec_path = get_argv_exec_path(); - if (exec_path == NULL) - return -1; - - /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); - snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); - free(exec_path); - - if (!lstat(path_dir, &st) && - !lstat(path_perf, &st)) - return run_dir(path_dir, path_perf); - - return TEST_SKIP; -} - -DEFINE_SUITE("Setup struct perf_event_attr", attr); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 470a9709427d..4751dd3c6f67 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -8,6 +8,7 @@ #include <errno.h> #include <poll.h> #include <unistd.h> +#include <setjmp.h> #include <string.h> #include <stdlib.h> #include <sys/types.h> @@ -39,11 +40,8 @@ * making them easier to debug. */ static bool dont_fork; -/* Don't fork the tests in parallel and wait for their completion. */ -static bool sequential = true; -/* Do it in parallel, lacks infrastructure to avoid running tests that clash for resources, - * So leave it as the developers choice to enable while working on the needed infra */ -static bool parallel; +/* Fork the tests in parallel and wait for their completion. */ +static bool sequential; const char *dso_to_test; const char *test_objdump_path = "objdump"; @@ -73,13 +71,14 @@ static struct test_suite *generic_tests[] = { &suite__PERF_RECORD, &suite__pmu, &suite__pmu_events, + &suite__hwmon_pmu, + &suite__tool_pmu, &suite__dso_data, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, &suite__syscall_openat_tp_fields, #endif - &suite__attr, &suite__hists_link, &suite__python_use, &suite__bp_signal, @@ -139,12 +138,6 @@ static struct test_suite *generic_tests[] = { NULL, }; -static struct test_suite **tests[] = { - generic_tests, - arch_tests, - NULL, /* shell tests created at runtime. */ -}; - static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, @@ -155,6 +148,9 @@ static struct test_workload *workloads[] = { &workload__landlock, }; +#define workloads__for_each(workload) \ + for (unsigned i = 0; i < ARRAY_SIZE(workloads) && ({ workload = workloads[i]; 1; }); i++) + static int num_subtests(const struct test_suite *t) { int num; @@ -198,6 +194,14 @@ static test_fnptr test_function(const struct test_suite *t, int subtest) return t->test_cases[subtest].run_case; } +static bool test_exclusive(const struct test_suite *t, int subtest) +{ + if (subtest <= 0) + return t->test_cases[0].exclusive; + + return t->test_cases[subtest].exclusive; +} + static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) { int i; @@ -229,20 +233,47 @@ struct child_test { int subtest; }; +static jmp_buf run_test_jmp_buf; + +static void child_test_sig_handler(int sig) +{ + siglongjmp(run_test_jmp_buf, sig); +} + static int run_test_child(struct child_process *process) { + const int signals[] = { + SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGINT, SIGPIPE, SIGQUIT, SIGSEGV, SIGTERM, + }; struct child_test *child = container_of(process, struct child_test, process); int err; + err = sigsetjmp(run_test_jmp_buf, 1); + if (err) { + fprintf(stderr, "\n---- unexpected signal (%d) ----\n", err); + err = err > 0 ? -err : -1; + goto err_out; + } + + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], child_test_sig_handler); + pr_debug("--- start ---\n"); pr_debug("test child forked, pid %d\n", getpid()); err = test_function(child->test, child->subtest)(child->test, child->subtest); pr_debug("---- end(%d) ----\n", err); + +err_out: fflush(NULL); + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], SIG_DFL); return -err; } -static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width) +#define TEST_RUNNING -3 + +static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width, + int running) { if (has_subtests(t)) { int subw = width > 2 ? width - 2 : width; @@ -252,6 +283,9 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest)); switch (result) { + case TEST_RUNNING: + color_fprintf(stderr, PERF_COLOR_YELLOW, " Running (%d active)\n", running); + break; case TEST_OK: pr_info(" Ok\n"); break; @@ -273,16 +307,25 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul return 0; } -static int finish_test(struct child_test *child_test, int width) +static void finish_test(struct child_test **child_tests, int running_test, int child_test_num, + int width) { - struct test_suite *t = child_test->test; - int i = child_test->test_num; - int subi = child_test->subtest; - int err = child_test->process.err; - bool err_done = err <= 0; + struct child_test *child_test = child_tests[running_test]; + struct test_suite *t; + int i, subi, err; + bool err_done = false; struct strbuf err_output = STRBUF_INIT; + int last_running = -1; int ret; + if (child_test == NULL) { + /* Test wasn't started. */ + return; + } + t = child_test->test; + i = child_test->test_num; + subi = child_test->subtest; + err = child_test->process.err; /* * For test suites with subtests, display the suite name ahead of the * sub test names. @@ -294,7 +337,7 @@ static int finish_test(struct child_test *child_test, int width) * Busy loop reading from the child's stdout/stderr that are set to be * non-blocking until EOF. */ - if (!err_done) + if (err > 0) fcntl(err, F_SETFL, O_NONBLOCK); if (verbose > 1) { if (has_subtests(t)) @@ -308,57 +351,90 @@ static int finish_test(struct child_test *child_test, int width) .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, }, }; - char buf[512]; - ssize_t len; + if (perf_use_color_default) { + int running = 0; - /* Poll to avoid excessive spinning, timeout set for 100ms. */ - poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); - if (!err_done && pfds[0].revents) { - errno = 0; - len = read(err, buf, sizeof(buf) - 1); + for (int y = running_test; y < child_test_num; y++) { + if (child_tests[y] == NULL) + continue; + if (check_if_command_finished(&child_tests[y]->process) == 0) + running++; + } + if (running != last_running) { + if (last_running != -1) { + /* + * Erase "Running (.. active)" line + * printed before poll/sleep. + */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); + } + print_test_result(t, i, subi, TEST_RUNNING, width, running); + last_running = running; + } + } - if (len <= 0) { - err_done = errno != EAGAIN; - } else { - buf[len] = '\0'; - if (verbose > 1) - fprintf(stdout, "%s", buf); - else + err_done = true; + if (err <= 0) { + /* No child stderr to poll, sleep for 10ms for child to complete. */ + usleep(10 * 1000); + } else { + /* Poll to avoid excessive spinning, timeout set for 100ms. */ + poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); + if (pfds[0].revents) { + char buf[512]; + ssize_t len; + + len = read(err, buf, sizeof(buf) - 1); + + if (len > 0) { + err_done = false; + buf[len] = '\0'; strbuf_addstr(&err_output, buf); + } } } + if (err_done) + err_done = check_if_command_finished(&child_test->process); + } + if (perf_use_color_default && last_running != -1) { + /* Erase "Running (.. active)" line printed before poll/sleep. */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); } /* Clean up child process. */ ret = finish_command(&child_test->process); - if (verbose == 1 && ret == TEST_FAIL) { - /* Add header for test that was skipped above. */ - if (has_subtests(t)) - pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); - else - pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); + if (verbose > 1 || (verbose == 1 && ret == TEST_FAIL)) fprintf(stderr, "%s", err_output.buf); - } + strbuf_release(&err_output); - print_test_result(t, i, subi, ret, width); + print_test_result(t, i, subi, ret, width, /*running=*/0); if (err > 0) close(err); - return 0; + zfree(&child_tests[running_test]); } static int start_test(struct test_suite *test, int i, int subi, struct child_test **child, - int width) + int width, int pass) { int err; *child = NULL; if (dont_fork) { - pr_debug("--- start ---\n"); - err = test_function(test, subi)(test, subi); - pr_debug("---- end ----\n"); - print_test_result(test, i, subi, err, width); + if (pass == 1) { + pr_debug("--- start ---\n"); + err = test_function(test, subi)(test, subi); + pr_debug("---- end ----\n"); + print_test_result(test, i, subi, err, width, /*running=*/0); + } + return 0; + } + if (pass == 1 && !sequential && test_exclusive(test, subi)) { + /* When parallel, skip exclusive tests on the first pass. */ + return 0; + } + if (pass != 1 && (sequential || !test_exclusive(test, subi))) { + /* Sequential and non-exclusive tests were run on the first pass. */ return 0; } - *child = zalloc(sizeof(**child)); if (!*child) return -ENOMEM; @@ -377,35 +453,42 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes (*child)->process.err = -1; } (*child)->process.no_exec_cmd = run_test_child; - err = start_command(&(*child)->process); - if (err || !sequential) - return err; - return finish_test(*child, width); + if (sequential || pass == 2) { + err = start_command(&(*child)->process); + if (err) + return err; + finish_test(child, /*running_test=*/0, /*child_test_num=*/1, width); + return 0; + } + return start_command(&(*child)->process); } -#define for_each_test(j, k, t) \ - for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0) \ - while ((t = tests[j][k++]) != NULL) +/* State outside of __cmd_test for the sake of the signal handler. */ + +static size_t num_tests; +static struct child_test **child_tests; +static jmp_buf cmd_test_jmp_buf; -static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) +static void cmd_test_sig_handler(int sig) { - struct test_suite *t; - unsigned int j, k; - int i = 0; - int width = 0; - size_t num_tests = 0; - struct child_test **child_tests; - int child_test_num = 0; + siglongjmp(cmd_test_jmp_buf, sig); +} + +static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], + struct intlist *skiplist) +{ + static int width = 0; + int err = 0; - for_each_test(j, k, t) { - int len = strlen(test_description(t, -1)); + for (struct test_suite **t = suites; *t; t++) { + int len = strlen(test_description(*t, -1)); if (width < len) width = len; - if (has_subtests(t)) { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - len = strlen(test_description(t, subi)); + if (has_subtests(*t)) { + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + len = strlen(test_description(*t, subi)); if (width < len) width = len; num_tests++; @@ -418,97 +501,137 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) if (!child_tests) return -ENOMEM; - for_each_test(j, k, t) { - int curr = i++; - - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { - bool skip = true; + err = sigsetjmp(cmd_test_jmp_buf, 1); + if (err) { + pr_err("\nSignal (%d) while running tests.\nTerminating tests with the same signal\n", + err); + for (size_t x = 0; x < num_tests; x++) { + struct child_test *child_test = child_tests[x]; - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - if (perf_test__matches(test_description(t, subi), - curr, argc, argv)) - skip = false; - } - - if (skip) + if (!child_test || child_test->process.pid <= 0) continue; - } - if (intlist__find(skiplist, i)) { - pr_info("%3d: %-*s:", curr + 1, width, test_description(t, -1)); - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); - continue; + pr_debug3("Killing %d pid %d\n", + child_test->test_num + 1, + child_test->process.pid); + kill(child_test->process.pid, err); } + goto err_out; + } + signal(SIGINT, cmd_test_sig_handler); + signal(SIGTERM, cmd_test_sig_handler); - if (!has_subtests(t)) { - int err = start_test(t, curr, -1, &child_tests[child_test_num++], width); + /* + * In parallel mode pass 1 runs non-exclusive tests in parallel, pass 2 + * runs the exclusive tests sequentially. In other modes all tests are + * run in pass 1. + */ + for (int pass = 1; pass <= 2; pass++) { + int child_test_num = 0; + int i = 0; + + for (struct test_suite **t = suites; *t; t++) { + int curr = i++; + + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) { + /* + * Test suite shouldn't be run based on + * description. See if subtest should. + */ + bool skip = true; + + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (perf_test__matches(test_description(*t, subi), + curr, argc, argv)) + skip = false; + } + + if (skip) + continue; + } - if (err) { - /* TODO: if !sequential waitpid the already forked children. */ - free(child_tests); - return err; + if (intlist__find(skiplist, i)) { + pr_info("%3d: %-*s:", curr + 1, width, test_description(*t, -1)); + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); + continue; } - } else { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - int err; - if (!perf_test__matches(test_description(t, subi), + if (!has_subtests(*t)) { + err = start_test(*t, curr, -1, &child_tests[child_test_num++], + width, pass); + if (err) + goto err_out; + continue; + } + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (!perf_test__matches(test_description(*t, subi), curr, argc, argv)) continue; - err = start_test(t, curr, subi, &child_tests[child_test_num++], - width); + err = start_test(*t, curr, subi, &child_tests[child_test_num++], + width, pass); if (err) - return err; + goto err_out; } } - } - for (i = 0; i < child_test_num; i++) { if (!sequential) { - int ret = finish_test(child_tests[i], width); - - if (ret) - return ret; + /* Parallel mode starts tests but doesn't finish them. Do that now. */ + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } - free(child_tests[i]); + } +err_out: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + if (err) { + pr_err("Internal test harness failure. Completing any started tests:\n:"); + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } free(child_tests); - return 0; + return err; } -static int perf_test__list(int argc, const char **argv) +static int perf_test__list(struct test_suite **suites, int argc, const char **argv) { - unsigned int j, k; - struct test_suite *t; int i = 0; - for_each_test(j, k, t) { + for (struct test_suite **t = suites; *t; t++) { int curr = i++; - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) continue; - pr_info("%3d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(*t, -1)); - if (has_subtests(t)) { - int subn = num_subtests(t); + if (has_subtests(*t)) { + int subn = num_subtests(*t); int subi; for (subi = 0; subi < subn; subi++) pr_info("%3d:%1d: %s\n", i, subi + 1, - test_description(t, subi)); + test_description(*t, subi)); } } return 0; } +static int workloads__fprintf_list(FILE *fp) +{ + struct test_workload *twl; + int printed = 0; + + workloads__for_each(twl) + printed += fprintf(fp, "%s\n", twl->name); + + return printed; +} + static int run_workload(const char *work, int argc, const char **argv) { - unsigned int i = 0; struct test_workload *twl; - for (i = 0; i < ARRAY_SIZE(workloads); i++) { - twl = workloads[i]; + workloads__for_each(twl) { if (!strcmp(twl->name, work)) return twl->func(argc, argv); } @@ -526,6 +649,55 @@ static int perf_test__config(const char *var, const char *value, return 0; } +static struct test_suite **build_suites(void) +{ + /* + * TODO: suites is static to avoid needing to clean up the scripts tests + * for leak sanitizer. + */ + static struct test_suite **suites[] = { + generic_tests, + arch_tests, + NULL, + }; + struct test_suite **result; + struct test_suite *t; + size_t n = 0, num_suites = 0; + + if (suites[2] == NULL) + suites[2] = create_script_test_suites(); + +#define for_each_test(t) \ + for (size_t i = 0, j = 0; i < ARRAY_SIZE(suites); i++, j = 0) \ + while ((t = suites[i][j++]) != NULL) + + for_each_test(t) + num_suites++; + + result = calloc(num_suites + 1, sizeof(struct test_suite *)); + + for (int pass = 1; pass <= 2; pass++) { + for_each_test(t) { + bool exclusive = false; + + if (!has_subtests(t)) { + exclusive = test_exclusive(t, -1); + } else { + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { + if (test_exclusive(t, subi)) { + exclusive = true; + break; + } + } + } + if ((!exclusive && pass == 1) || (exclusive && pass == 2)) + result[n++] = t; + } + } + return result; +#undef for_each_test +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -534,16 +706,17 @@ int cmd_test(int argc, const char **argv) }; const char *skip = NULL; const char *workload = NULL; + bool list_workloads = false; const struct option test_options[] = { OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), - OPT_BOOLEAN('p', "parallel", ¶llel, "Run the tests in parallel"), OPT_BOOLEAN('S', "sequential", &sequential, "Run the tests one after another rather than in parallel"), - OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), + OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."), + OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), OPT_STRING(0, "objdump", &test_objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -552,6 +725,7 @@ int cmd_test(int argc, const char **argv) const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); + struct test_suite **suites; if (ret < 0) return ret; @@ -561,22 +735,29 @@ int cmd_test(int argc, const char **argv) /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); - tests[2] = create_script_test_suites(); argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); - if (argc >= 1 && !strcmp(argv[0], "list")) - return perf_test__list(argc - 1, argv + 1); + if (argc >= 1 && !strcmp(argv[0], "list")) { + suites = build_suites(); + ret = perf_test__list(suites, argc - 1, argv + 1); + free(suites); + return ret; + } if (workload) return run_workload(workload, argc, argv); + if (list_workloads) { + workloads__fprintf_list(stdout); + return 0; + } + if (dont_fork) sequential = true; - else if (parallel) - sequential = false; symbol_conf.priv_size = sizeof(int); symbol_conf.try_vmlinux_path = true; + if (symbol__init(NULL) < 0) return -1; @@ -588,5 +769,8 @@ int cmd_test(int argc, const char **argv) */ rlimit__bump_memlock(); - return __cmd_test(argc, argv, skiplist); + suites = build_suites(); + ret = __cmd_test(suites, argc, argv, skiplist); + free(suites); + return ret; } diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c index 44d1be303b67..93c94408bdc8 100644 --- a/tools/perf/tests/demangle-java-test.c +++ b/tools/perf/tests/demangle-java-test.c @@ -2,6 +2,7 @@ #include <string.h> #include <stdlib.h> #include <stdio.h> +#include <linux/kernel.h> #include "tests.h" #include "session.h" #include "debug.h" @@ -28,7 +29,7 @@ static int test__demangle_java(struct test_suite *test __maybe_unused, int subte "void java.lang.Object<init>()" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = java_demangle_sym(test_cases[i].mangled, 0); if (strcmp(buf, test_cases[i].demangled)) { pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index e155f0e0e04d..deefe5003bfc 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -126,6 +126,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -134,7 +135,6 @@ static int attach__cpu_disabled(struct evlist *evlist) return err; } - perf_cpu_map__put(cpus); return evsel__enable(evsel); } @@ -153,10 +153,10 @@ static int attach__cpu_enabled(struct evlist *evlist) } err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err == -EACCES) return TEST_SKIP; - perf_cpu_map__put(cpus); return err ? TEST_FAIL : TEST_OK; } @@ -188,6 +188,7 @@ static int test_times(int (attach)(struct evlist *), err = attach(evlist); if (err == TEST_SKIP) { pr_debug(" SKIP : not enough rights\n"); + evlist__delete(evlist); return err; } diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index cf4da3d748c2..226196fb9677 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include "evsel.h" #include "tests.h" #include "debug.h" @@ -36,33 +36,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse int subtest __maybe_unused) { struct evsel *evsel = evsel__newtp("sched", "sched_switch"); - int ret = 0; + int ret = TEST_OK; if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; } if (evsel__test_field(evsel, "prev_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_state", sizeof(long), true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); @@ -70,23 +70,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return TEST_FAIL; } if (evsel__test_field(evsel, "comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "target_cpu", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); return ret; } -DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test); +static struct test_case tests__perf_evsel__tp_sched_test[] = { + TEST_CASE_REASON("Parse sched tracepoints fields", + perf_evsel__tp_sched_test, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__perf_evsel__tp_sched_test = { + .desc = "Parse sched tracepoints fields", + .test_cases = tests__perf_evsel__tp_sched_test, +}; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index e3aa9d4fcf3a..726cf8d4da28 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,6 +6,7 @@ #include "util/header.h" #include "util/smt.h" #include "tests.h" +#include <perf/cpumap.h> #include <math.h> #include <stdlib.h> #include <string.h> @@ -74,14 +75,12 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u double val, num_cpus_online, num_cpus, num_cores, num_dies, num_packages; int ret; struct expr_parse_ctx *ctx; - bool is_intel = false; char strcmp_cpuid_buf[256]; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); char *escaped_cpuid1, *escaped_cpuid2; TEST_ASSERT_VAL("get_cpuid", cpuid); - is_intel = strstr(cpuid, "Intel") != NULL; TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0); @@ -244,12 +243,19 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u if (num_dies) // Some platforms do not have CPU die support, for example s390 TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); - TEST_ASSERT_VAL("#system_tsc_freq", expr__parse(&val, ctx, "#system_tsc_freq") == 0); - if (is_intel) - TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0); - else - TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO); + if (expr__parse(&val, ctx, "#system_tsc_freq") == 0) { + bool is_intel = strstr(cpuid, "Intel") != NULL; + + if (is_intel) + TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0); + else + TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO); + } else { +#if defined(__i386__) || defined(__x86_64__) + TEST_ASSERT_VAL("#system_tsc_freq unsupported", 0); +#endif + } /* * Source count returns the number of events aggregating in a leader * event including the leader. Check parsing yields an id. diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c new file mode 100644 index 000000000000..d2b066a2b557 --- /dev/null +++ b/tools/perf/tests/hwmon_pmu.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "hwmon_pmu.h" +#include "parse-events.h" +#include "tests.h" +#include <fcntl.h> +#include <sys/stat.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/string.h> + +static const struct test_event { + const char *name; + const char *alias; + long config; +} test_events[] = { + { + "temp_test_hwmon_event1", + "temp1", + 0xA0001, + }, + { + "temp_test_hwmon_event2", + "temp2", + 0xA0002, + }, +}; + +/* Cleanup test PMU directory. */ +static int test_pmu_put(const char *dir, struct perf_pmu *hwm) +{ + char buf[PATH_MAX + 20]; + int ret; + + if (scnprintf(buf, sizeof(buf), "rm -fr %s", dir) < 0) { + pr_err("Failure to set up buffer for \"%s\"\n", dir); + return -EINVAL; + } + ret = system(buf); + if (ret) + pr_err("Failure to \"%s\"\n", buf); + + list_del(&hwm->list); + perf_pmu__delete(hwm); + return ret; +} + +/* + * Prepare test PMU directory data, normally exported by kernel at + * /sys/class/hwmon/hwmon<number>/. Give as input a buffer to hold the file + * path, the result is PMU loaded using that directory. + */ +static struct perf_pmu *test_pmu_get(char *dir, size_t sz) +{ + const char *test_hwmon_name_nl = "A test hwmon PMU\n"; + const char *test_hwmon_name = "A test hwmon PMU"; + /* Simulated hwmon items. */ + const struct test_item { + const char *name; + const char *value; + } test_items[] = { + { "temp1_label", "test hwmon event1\n", }, + { "temp1_input", "40000\n", }, + { "temp2_label", "test hwmon event2\n", }, + { "temp2_input", "50000\n", }, + }; + int hwmon_dirfd = -1, test_dirfd = -1, file; + struct perf_pmu *hwm = NULL; + ssize_t len; + + /* Create equivalent of sysfs mount point. */ + scnprintf(dir, sz, "/tmp/perf-hwmon-pmu-test-XXXXXX"); + if (!mkdtemp(dir)) { + pr_err("mkdtemp failed\n"); + dir[0] = '\0'; + return NULL; + } + test_dirfd = open(dir, O_PATH|O_DIRECTORY); + if (test_dirfd < 0) { + pr_err("Failed to open test directory \"%s\"\n", dir); + goto err_out; + } + + /* Create the test hwmon directory and give it a name. */ + if (mkdirat(test_dirfd, "hwmon1234", 0755) < 0) { + pr_err("Failed to mkdir hwmon directory\n"); + goto err_out; + } + hwmon_dirfd = openat(test_dirfd, "hwmon1234", O_DIRECTORY); + if (hwmon_dirfd < 0) { + pr_err("Failed to open test hwmon directory \"%s/hwmon1234\"\n", dir); + goto err_out; + } + file = openat(hwmon_dirfd, "name", O_WRONLY | O_CREAT, 0600); + if (file < 0) { + pr_err("Failed to open for writing file \"name\"\n"); + goto err_out; + } + len = strlen(test_hwmon_name_nl); + if (write(file, test_hwmon_name_nl, len) < len) { + close(file); + pr_err("Failed to write to 'name' file\n"); + goto err_out; + } + close(file); + + /* Create test hwmon files. */ + for (size_t i = 0; i < ARRAY_SIZE(test_items); i++) { + const struct test_item *item = &test_items[i]; + + file = openat(hwmon_dirfd, item->name, O_WRONLY | O_CREAT, 0600); + if (file < 0) { + pr_err("Failed to open for writing file \"%s\"\n", item->name); + goto err_out; + } + + if (write(file, item->value, strlen(item->value)) < 0) { + pr_err("Failed to write to file \"%s\"\n", item->name); + close(file); + goto err_out; + } + close(file); + } + + /* Make the PMU reading the files created above. */ + hwm = perf_pmus__add_test_hwmon_pmu(hwmon_dirfd, "hwmon1234", test_hwmon_name); + if (!hwm) + pr_err("Test hwmon creation failed\n"); + +err_out: + if (!hwm) { + test_pmu_put(dir, hwm); + if (hwmon_dirfd >= 0) + close(hwmon_dirfd); + } + if (test_dirfd >= 0) + close(test_dirfd); + return hwm; +} + +static int do_test(size_t i, bool with_pmu, bool with_alias) +{ + const char *test_event = with_alias ? test_events[i].alias : test_events[i].name; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "hwmon_a_test_hwmon_pmu/%s/", test_event); + else + strlcpy(str, test_event, sizeof(str)); + + pr_debug("Testing '%s'\n", str); + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->pmu || !evsel->pmu->name || + strcmp(evsel->pmu->name, "hwmon_a_test_hwmon_pmu")) + continue; + + if (evsel->core.attr.config != (u64)test_events[i].config) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %ld\n", + __FILE__, __LINE__, str, + evsel->core.attr.config, + test_events[i].config); + ret = TEST_FAIL; + goto out; + } + found = true; + } + + if (!found) { + pr_debug("FAILED %s:%d Didn't find hwmon event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__hwmon_pmu(bool with_pmu) +{ + char dir[PATH_MAX]; + struct perf_pmu *pmu = test_pmu_get(dir, sizeof(dir)); + int ret = TEST_OK; + + if (!pmu) + return TEST_FAIL; + + for (size_t i = 0; i < ARRAY_SIZE(test_events); i++) { + ret = do_test(i, with_pmu, /*with_alias=*/false); + + if (ret != TEST_OK) + break; + + ret = do_test(i, with_pmu, /*with_alias=*/true); + + if (ret != TEST_OK) + break; + } + test_pmu_put(dir, pmu); + return ret; +} + +static int test__hwmon_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/false); +} + +static int test__hwmon_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/true); +} + +static int test__parse_hwmon_filename(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + const struct hwmon_parse_test { + const char *filename; + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + bool parse_ok; + } tests[] = { + { + .filename = "cpu0_accuracy", + .type = HWMON_TYPE_CPU, + .number = 0, + .item = HWMON_ITEM_ACCURACY, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "temp1_input", + .type = HWMON_TYPE_TEMP, + .number = 1, + .item = HWMON_ITEM_INPUT, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "fan2_vid", + .type = HWMON_TYPE_FAN, + .number = 2, + .item = HWMON_ITEM_VID, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "power3_crit_alarm", + .type = HWMON_TYPE_POWER, + .number = 3, + .item = HWMON_ITEM_CRIT, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "intrusion4_average_interval_min_alarm", + .type = HWMON_TYPE_INTRUSION, + .number = 4, + .item = HWMON_ITEM_AVERAGE_INTERVAL_MIN, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "badtype5_baditem", + .type = HWMON_TYPE_NONE, + .number = 5, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + { + .filename = "humidity6_baditem", + .type = HWMON_TYPE_NONE, + .number = 6, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(tests); i++) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + + TEST_ASSERT_EQUAL("parse_hwmon_filename", + parse_hwmon_filename( + tests[i].filename, + &type, + &number, + &item, + &alarm), + tests[i].parse_ok + ); + if (tests[i].parse_ok) { + TEST_ASSERT_EQUAL("parse_hwmon_filename type", type, tests[i].type); + TEST_ASSERT_EQUAL("parse_hwmon_filename number", number, tests[i].number); + TEST_ASSERT_EQUAL("parse_hwmon_filename item", item, tests[i].item); + TEST_ASSERT_EQUAL("parse_hwmon_filename alarm", alarm, tests[i].alarm); + } + } + return TEST_OK; +} + +static struct test_case tests__hwmon_pmu[] = { + TEST_CASE("Basic parsing test", parse_hwmon_filename), + TEST_CASE("Parsing without PMU name", hwmon_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", hwmon_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__hwmon_pmu = { + .desc = "Hwmon PMU", + .test_cases = tests__hwmon_pmu, +}; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index a5040772043f..a7fcbd589752 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -81,7 +81,7 @@ make_no_gtk2 := NO_GTK2=1 make_no_ui := NO_SLANG=1 NO_GTK2=1 make_no_demangle := NO_DEMANGLE=1 make_no_libelf := NO_LIBELF=1 -make_no_libunwind := NO_LIBUNWIND=1 +make_libunwind := LIBUNWIND=1 make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1 make_no_backtrace := NO_BACKTRACE=1 make_no_libcapstone := NO_CAPSTONE=1 @@ -121,7 +121,7 @@ make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX3 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1 -make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 +make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 @@ -153,7 +153,7 @@ run += make_no_gtk2 run += make_no_ui run += make_no_demangle run += make_no_libelf -run += make_no_libunwind +run += make_libunwind run += make_no_libdw_dwarf_unwind run += make_no_backtrace run += make_no_libcapstone diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 888df8eca981..3943da441979 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -40,7 +40,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused int flags = O_RDONLY | O_DIRECTORY; struct evlist *evlist = evlist__new(); struct evsel *evsel; - int err = -1, i, nr_events = 0, nr_polls = 0; + int ret = TEST_FAIL, err, i, nr_events = 0, nr_polls = 0; char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) { @@ -51,6 +51,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { pr_debug("%s: evsel__newtp\n", __func__); + ret = PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; goto out_delete_evlist; } @@ -138,11 +139,21 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused } } out_ok: - err = 0; + ret = TEST_OK; out_delete_evlist: evlist__delete(evlist); out: - return err; + return ret; } -DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields); +static struct test_case tests__syscall_openat_tp_fields[] = { + TEST_CASE_REASON("syscalls:sys_enter_openat event fields", + syscall_openat_tp_fields, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__syscall_openat_tp_fields = { + .desc = "syscalls:sys_enter_openat event fields", + .test_cases = tests__syscall_openat_tp_fields, +}; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9e3086d02150..82a19674a38f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -730,7 +730,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || - strcmp(evsel->pmu_name, "cpu")); + strcmp(evsel->pmu->name, "cpu")); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -898,8 +898,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -932,7 +931,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -947,7 +946,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); if (evsel__has_leader(evsel, leader)) @@ -1016,9 +1015,8 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ TEST_ASSERT_VAL("wrong exclude guest", - evsel->core.attr.exclude_guest); + !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", @@ -1072,7 +1070,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1103,8 +1101,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1122,8 +1119,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1222,7 +1218,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1437,7 +1433,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1453,7 +1449,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1468,7 +1464,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1497,7 +1493,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1513,7 +1509,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index be18506f6a24..6a681e3fb552 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -176,7 +176,8 @@ static int test__pmu_format(struct test_suite *test __maybe_unused, int subtest } memset(&attr, 0, sizeof(attr)); - ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, /*err=*/NULL); + ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, + /*apply_hardcoded=*/false, /*err=*/NULL); if (ret) { pr_err("perf_pmu__config_terms failed"); goto err_out; diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh index 2ccf4f1d46b6..1590a37363de 100755 --- a/tools/perf/tests/shell/annotate.sh +++ b/tools/perf/tests/shell/annotate.sh @@ -44,7 +44,7 @@ test_basic() { fi # Generate the annotated output file - perf annotate -i "${perfdata}" --stdio 2> /dev/null > "${perfout}" + perf annotate --no-demangle -i "${perfdata}" --stdio 2> /dev/null | head -250 > "${perfout}" # check if it has the target symbol if ! grep "${testsym}" "${perfout}" @@ -63,8 +63,8 @@ test_basic() { fi # check again with a target symbol name - if ! perf annotate -i "${perfdata}" "${testsym}" 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" "${testsym}" 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output when specifying the target symbol]" err=1 @@ -72,8 +72,8 @@ test_basic() { fi # check one more with external objdump tool (forced by --objdump option) - if ! perf annotate -i "${perfdata}" --objdump=objdump 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" --objdump=objdump 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output from non default disassembler (using --objdump)]" err=1 diff --git a/tools/perf/tests/shell/attr.sh b/tools/perf/tests/shell/attr.sh new file mode 100755 index 000000000000..5a4e43b2471d --- /dev/null +++ b/tools/perf/tests/shell/attr.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Perf attribute expectations test +# SPDX-License-Identifier: GPL-2.0 + +err=0 + +cleanup() { + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +shelldir=$(dirname "$0") +perf_path=$(which perf) +python "${shelldir}"/lib/attr.py -d "${shelldir}"/attr -v -p "$perf_path" +cleanup +exit $err diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/shell/attr/README index 4066fec7180a..67c4ca76b85d 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/shell/attr/README @@ -51,6 +51,8 @@ Following tests are defined (with perf commands): perf record --call-graph fp kill (test-record-graph-fp-aarch64) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) + perf record -e '{cycles,cache-misses}:S' kill (test-record-group-sampling1) + perf record -c 10000 -e '{cycles,cache-misses}:S' kill (test-record-group-sampling2) perf record -D kill (test-record-no-delay) perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/shell/attr/base-record index b44e4e6e4443..b44e4e6e4443 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/shell/attr/base-record diff --git a/tools/perf/tests/attr/base-record-spe b/tools/perf/tests/shell/attr/base-record-spe index 08fa96b59240..08fa96b59240 100644 --- a/tools/perf/tests/attr/base-record-spe +++ b/tools/perf/tests/shell/attr/base-record-spe diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/shell/attr/base-stat index fccd8ec4d1b0..fccd8ec4d1b0 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/shell/attr/base-stat diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/shell/attr/system-wide-dummy index a1e1d6a263bf..a1e1d6a263bf 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/shell/attr/system-wide-dummy diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/shell/attr/test-record-C0 index 198e8429a1bf..1049ac8b52f2 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/shell/attr/test-record-C0 @@ -18,5 +18,7 @@ sample_type=65927 mmap=0 comm=0 task=0 +inherit=0 [event:system-wide-dummy] +inherit=0 diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/shell/attr/test-record-basic index b0ca42a5ecc9..b0ca42a5ecc9 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/shell/attr/test-record-basic diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/shell/attr/test-record-branch-any index 1a99b3ce6b89..1a99b3ce6b89 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/shell/attr/test-record-branch-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/shell/attr/test-record-branch-filter-any index 709768b508c6..709768b508c6 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call index f943221f7825..f943221f7825 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret index fd4f5b4154a9..fd4f5b4154a9 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/shell/attr/test-record-branch-filter-hv index 4e52d685ebe1..4e52d685ebe1 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-hv diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call index e08c6ab3796e..e08c6ab3796e 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/shell/attr/test-record-branch-filter-k index b4b98f84fc2f..b4b98f84fc2f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-k diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/shell/attr/test-record-branch-filter-u index fb9610edbb0d..fb9610edbb0d 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-u diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/shell/attr/test-record-count index 5e9b9019d786..5e9b9019d786 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/shell/attr/test-record-count diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/shell/attr/test-record-data index a99bb13149c2..a99bb13149c2 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/shell/attr/test-record-data diff --git a/tools/perf/tests/attr/test-record-dummy-C0 b/tools/perf/tests/shell/attr/test-record-dummy-C0 index 576ec48b3aaf..91499405fff4 100644 --- a/tools/perf/tests/attr/test-record-dummy-C0 +++ b/tools/perf/tests/shell/attr/test-record-dummy-C0 @@ -19,7 +19,7 @@ sample_period=4000 sample_type=391 read_format=4|20 disabled=0 -inherit=1 +inherit=0 pinned=0 exclusive=0 exclude_user=0 @@ -37,7 +37,7 @@ precise_ip=0 mmap_data=0 sample_id_all=1 exclude_host=0 -exclude_guest=1 +exclude_guest=0 exclude_callchain_kernel=0 exclude_callchain_user=0 mmap2=1 diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/shell/attr/test-record-freq index 89e29f6b2ae0..89e29f6b2ae0 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/shell/attr/test-record-freq diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/shell/attr/test-record-graph-default index f0a18b4ea4f5..f0a18b4ea4f5 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/shell/attr/test-record-graph-default diff --git a/tools/perf/tests/attr/test-record-graph-default-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 index e98d62efb6f7..e98d62efb6f7 100644 --- a/tools/perf/tests/attr/test-record-graph-default-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/shell/attr/test-record-graph-dwarf index ae92061d611d..ae92061d611d 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/shell/attr/test-record-graph-dwarf diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/shell/attr/test-record-graph-fp index a6e60e839205..a6e60e839205 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/shell/attr/test-record-graph-fp diff --git a/tools/perf/tests/attr/test-record-graph-fp-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 index cbeea9971285..cbeea9971285 100644 --- a/tools/perf/tests/attr/test-record-graph-fp-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/shell/attr/test-record-group-sampling index 97e7e64a38f0..86a940d7895d 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/shell/attr/test-record-group-sampling @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 @@ -18,7 +19,7 @@ group_fd=1 type=0 config=3 -# default | PERF_SAMPLE_READ +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD sample_type=343 # PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling1 b/tools/perf/tests/shell/attr/test-record-group-sampling1 new file mode 100644 index 000000000000..4748ab7bf684 --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling1 @@ -0,0 +1,50 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 +freq=0 + +# inherit is enabled for group sampling +inherit=1 diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling2 b/tools/perf/tests/shell/attr/test-record-group-sampling2 new file mode 100644 index 000000000000..e0432244a0eb --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling2 @@ -0,0 +1,61 @@ +[config] +command = record +args = --no-bpf-event -c 10000 -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=10000 +freq=0 +write_backward=0 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=0 +freq=0 +write_backward=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/shell/attr/test-record-group1 index eeb1db392bc9..eeb1db392bc9 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/shell/attr/test-record-group1 diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/shell/attr/test-record-group2 index cebdaa8e64e4..891d41a7bddf 100644 --- a/tools/perf/tests/attr/test-record-group2 +++ b/tools/perf/tests/shell/attr/test-record-group2 @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/shell/attr/test-record-group3 b/tools/perf/tests/shell/attr/test-record-group3 new file mode 100644 index 000000000000..249be884959e --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group3 @@ -0,0 +1,31 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 +config=0|1 +sample_period=1234000 +sample_type=87 +read_format=28|31 +disabled=1 +inherit=1 +freq=0 + +[event-2:base-record] +fd=2 +group_fd=1 +config=0|1 +sample_period=6789000 +sample_type=87 +read_format=28|31 +disabled=0 +inherit=1 +mmap=0 +comm=0 +freq=0 +enable_on_exec=0 +task=0 diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/shell/attr/test-record-no-buffering index 583dcbb078ba..583dcbb078ba 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/shell/attr/test-record-no-buffering diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/shell/attr/test-record-no-inherit index 15d1dc162e1c..15d1dc162e1c 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/shell/attr/test-record-no-inherit diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/shell/attr/test-record-no-samples index 596fbd6d5a2c..596fbd6d5a2c 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/shell/attr/test-record-no-samples diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/shell/attr/test-record-period index 119101154c5e..119101154c5e 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/shell/attr/test-record-period diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/shell/attr/test-record-pfm-period index 368f5b814094..368f5b814094 100644 --- a/tools/perf/tests/attr/test-record-pfm-period +++ b/tools/perf/tests/shell/attr/test-record-pfm-period diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/shell/attr/test-record-raw index 13a5f7860c78..13a5f7860c78 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/shell/attr/test-record-raw diff --git a/tools/perf/tests/attr/test-record-spe-period b/tools/perf/tests/shell/attr/test-record-spe-period index 75f8c9cd8e3f..75f8c9cd8e3f 100644 --- a/tools/perf/tests/attr/test-record-spe-period +++ b/tools/perf/tests/shell/attr/test-record-spe-period diff --git a/tools/perf/tests/attr/test-record-spe-period-term b/tools/perf/tests/shell/attr/test-record-spe-period-term index 8f60a4fec657..8f60a4fec657 100644 --- a/tools/perf/tests/attr/test-record-spe-period-term +++ b/tools/perf/tests/shell/attr/test-record-spe-period-term diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/shell/attr/test-record-spe-physical-address index 7ebcf5012ce3..7ebcf5012ce3 100644 --- a/tools/perf/tests/attr/test-record-spe-physical-address +++ b/tools/perf/tests/shell/attr/test-record-spe-physical-address diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 index bed765450ca9..bed765450ca9 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 index 15ebfc3418e3..15ebfc3418e3 100644 --- a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 index a65113cd7311..a65113cd7311 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/shell/attr/test-stat-C0 index a2c76d10b2bb..a2c76d10b2bb 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/shell/attr/test-stat-C0 diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/shell/attr/test-stat-basic index 69867d049fda..69867d049fda 100644 --- a/tools/perf/tests/attr/test-stat-basic +++ b/tools/perf/tests/shell/attr/test-stat-basic diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/shell/attr/test-stat-default index a1e2da0a9a6d..e47fb4944679 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/shell/attr/test-stat-default @@ -88,98 +88,142 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/shell/attr/test-stat-detailed-1 index 1c52cb05c900..3d500d3e0c5c 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-1 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/shell/attr/test-stat-detailed-2 index 7e961d24a885..01777a63752f 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-2 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/shell/attr/test-stat-detailed-3 index e50535f45977..8400abd7e1e4 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-3 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 @@ -290,8 +334,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event35:base-stat] -fd=35 +[event39:base-stat] +fd=39 type=3 config=512 optional=1 @@ -300,8 +344,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event36:base-stat] -fd=36 +[event40:base-stat] +fd=40 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/shell/attr/test-stat-group1 index 1746751123dc..1746751123dc 100644 --- a/tools/perf/tests/attr/test-stat-group1 +++ b/tools/perf/tests/shell/attr/test-stat-group1 diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/shell/attr/test-stat-no-inherit index 924fbb9300d1..924fbb9300d1 100644 --- a/tools/perf/tests/attr/test-stat-no-inherit +++ b/tools/perf/tests/shell/attr/test-stat-no-inherit diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh index b5dc10b2a738..bead723e34af 100755 --- a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh +++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh @@ -19,35 +19,74 @@ TEST_RESULT=0 # skip if not supported -BLACKFUNC=`head -n 1 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` -if [ -z "$BLACKFUNC" ]; then +BLACKFUNC_LIST=`head -n 5 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` +if [ -z "$BLACKFUNC_LIST" ]; then print_overall_skipped exit 0 fi +# try to find vmlinux with DWARF debug info +VMLINUX_FILE=$(perf probe -v random_probe |& grep "Using.*for symbols" | sed -r 's/^Using (.*) for symbols$/\1/') + # remove all previously added probes clear_all_probes ### adding blacklisted function - -# functions from blacklist should be skipped by perf probe -! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err -PERF_EXIT_CODE=$? - REGEX_SCOPE_FAIL="Failed to find scope of probe point" REGEX_SKIP_MESSAGE=" is blacklisted function, skip it\." -REGEX_NOT_FOUND_MESSAGE="Probe point \'$BLACKFUNC\' not found." +REGEX_NOT_FOUND_MESSAGE="Probe point \'$RE_EVENT\' not found." REGEX_ERROR_MESSAGE="Error: Failed to add events." REGEX_INVALID_ARGUMENT="Failed to write event: Invalid argument" REGEX_SYMBOL_FAIL="Failed to find symbol at $RE_ADDRESS" -REGEX_OUT_SECTION="$BLACKFUNC is out of \.\w+, skip it" -../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err -CHECK_EXIT_CODE=$? - -print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" -(( TEST_RESULT += $? )) - +REGEX_OUT_SECTION="$RE_EVENT is out of \.\w+, skip it" +REGEX_MISSING_DECL_LINE="A function DIE doesn't have decl_line. Maybe broken DWARF?" + +BLACKFUNC="" +SKIP_DWARF=0 + +for BLACKFUNC in $BLACKFUNC_LIST; do + echo "Probing $BLACKFUNC" + + # functions from blacklist should be skipped by perf probe + ! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err + PERF_EXIT_CODE=$? + + # check for bad DWARF polluting the result + ../common/check_all_patterns_found.pl "$REGEX_MISSING_DECL_LINE" >/dev/null < $LOGS_DIR/adding_blacklisted.err + + if [ $? -eq 0 ]; then + SKIP_DWARF=1 + echo "Result polluted by broken DWARF, trying another probe" + + # confirm that the broken DWARF comes from assembler + if [ -n "$VMLINUX_FILE" ]; then + readelf -wi "$VMLINUX_FILE" | + awk -v probe="$BLACKFUNC" '/DW_AT_language/ { comp_lang = $0 } + $0 ~ probe { if (comp_lang) { print comp_lang }; exit }' | + grep -q "MIPS assembler" + + CHECK_EXIT_CODE=$? + if [ $CHECK_EXIT_CODE -ne 0 ]; then + SKIP_DWARF=0 # broken DWARF while available + break + fi + fi + else + ../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err + CHECK_EXIT_CODE=$? + + SKIP_DWARF=0 + break + fi +done + +if [ $SKIP_DWARF -eq 1 ]; then + print_testcase_skipped "adding blacklisted function $BLACKFUNC" +else + print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" + (( TEST_RESULT += $? )) +fi ### listing not-added probe diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh index 2d65defb7e0f..c63bc8c73e26 100755 --- a/tools/perf/tests/shell/coresight/asm_pure_loop.sh +++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / ASM Pure Loop +# CoreSight / ASM Pure Loop (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh index ddcc9bb850f5..8e29630957c8 100755 --- a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh +++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Memcpy 16k 10 Threads +# CoreSight / Memcpy 16k 10 Threads (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh index 2ce5e139b2fd..0c4c82a1c8e1 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 10 Threads - Check TID +# CoreSight / Thread Loop 10 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh index 3ad9498753d7..d3aea9fc6ced 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 2 Threads - Check TID +# CoreSight / Thread Loop 2 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh index 4fbb4a29aad3..7429d3a2ae43 100755 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Unroll Loop Thread 10 +# CoreSight / Unroll Loop Thread 10 (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh index a6ee740f0d7e..2df05052c324 100755 --- a/tools/perf/tests/shell/ftrace.sh +++ b/tools/perf/tests/shell/ftrace.sh @@ -67,12 +67,12 @@ test_ftrace_latency() { test_ftrace_profile() { echo "perf ftrace profile test" - perf ftrace profile sleep 0.1 > "${output}" + perf ftrace profile -m 16M sleep 0.1 > "${output}" grep ^# "${output}" grep sleep "${output}" grep schedule "${output}" grep execve "${output}" - time_re="[[:space:]]+10[[:digit:]]{4}\.[[:digit:]]{3}" + time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}" # 100283.000 100283.000 100283.000 1 __x64_sys_clock_nanosleep # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds # Strip the _x64_sys part to work with other architectures diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/shell/lib/attr.py index e890c261ad26..3db9a7d78715 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/shell/lib/attr.py @@ -246,6 +246,23 @@ class Test(object): return False return True + def restore_sample_rate(self, value=10000): + try: + # Check value of sample_rate + with open("/proc/sys/kernel/perf_event_max_sample_rate", "r") as fIn: + curr_value = fIn.readline() + # If too low restore to reasonable value + if not curr_value or int(curr_value) < int(value): + with open("/proc/sys/kernel/perf_event_max_sample_rate", "w") as fOut: + fOut.write(str(value)) + + except IOError as e: + log.warning("couldn't restore sample_rate value: I/O error %s" % e) + except ValueError as e: + log.warning("couldn't restore sample_rate value: Value error %s" % e) + except TypeError as e: + log.warning("couldn't restore sample_rate value: Type error %s" % e) + def load_events(self, path, events): parser_event = configparser.ConfigParser() parser_event.read(path) @@ -283,6 +300,7 @@ class Test(object): if self.skip_test_kernel_until(): raise Notest(self, "new kernel skip") + self.restore_sample_rate() cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir, self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh index 11ed2c25ed91..184d62e7e5bd 100644 --- a/tools/perf/tests/shell/lib/coresight.sh +++ b/tools/perf/tests/shell/lib/coresight.sh @@ -18,7 +18,7 @@ BIN="$DIR/$TEST" # If the test tool/binary does not exist and is executable then skip the test if ! test -x "$BIN"; then exit 2; fi # If CoreSight is not available, skip the test -perf list cs_etm | grep -q cs_etm || exit 2 +perf list pmu | grep -q cs_etm || exit 2 DATD="." # If the data dir env is set then make the data dir use that instead of ./ if test -n "$PERF_TEST_CORESIGHT_DATADIR"; then diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py index abc1fd737782..8ddb85586131 100644 --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py @@ -57,6 +57,7 @@ def check_json_output(expected_items): 'interval': lambda x: isfloat(x), 'metric-unit': lambda x: True, 'metric-value': lambda x: isfloat(x), + 'metric-threshold': lambda x: x in ['unknown', 'good', 'less good', 'nearly bad', 'bad'], 'metricgroup': lambda x: True, 'node': lambda x: True, 'pcnt-running': lambda x: isfloat(x), @@ -68,13 +69,15 @@ def check_json_output(expected_items): for item in json.loads(input): if expected_items != -1: count = len(item) - if count != expected_items and count >= 1 and count <= 6 and 'metric-value' in item: + if count != expected_items and count >= 1 and count <= 7 and 'metric-value' in item: # Events that generate >1 metric may have isolated metric # values and possibly other prefixes like interval, core, # aggregate-number, or event-runtime/pcnt-running from multiplexing. pass elif count != expected_items and count >= 1 and count <= 5 and 'metricgroup' in item: pass + elif count == expected_items + 1 and 'metric-threshold' in item: + pass elif count != expected_items: raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}' f' in \'{item}\'') diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 8a868ae64560..76a9846cff22 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -24,8 +24,11 @@ trap trap_cleanup EXIT TERM INT test_list_json() { echo "Json output test" + # Generate perf list json output into list_output file. perf list -j -o "${list_output}" - $PYTHON -m json.tool "${list_output}" + # Validate the json using python, redirect the json copy to /dev/null as + # otherwise the test may block writing to stdout. + $PYTHON -m json.tool "${list_output}" /dev/null echo "Json output test [Success]" } diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index c1ec5762215b..30d195d4c62f 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -27,7 +27,7 @@ check() { exit fi - if ! perf list | grep -q lock:contention_begin; then + if ! perf list tracepoint | grep -q lock:contention_begin; then echo "[Skip] No lock contention tracepoints" err=2 exit diff --git a/tools/perf/tests/shell/perftool-testsuite_report.sh b/tools/perf/tests/shell/perftool-testsuite_report.sh index 973012ce92a7..a8cf75b4e77e 100755 --- a/tools/perf/tests/shell/perftool-testsuite_report.sh +++ b/tools/perf/tests/shell/perftool-testsuite_report.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perftool-testsuite_report +# perftool-testsuite_report (exclusive) # SPDX-License-Identifier: GPL-2.0 test -d "$(dirname "$0")/base_report" || exit 2 diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index d4c8005ce9b9..e459aa99a951 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -13,6 +13,7 @@ skip_test_missing_symbol ${sym} data=$(mktemp /tmp/perf.data.XXXXXX) data2=$(mktemp /tmp/perf.data2.XXXXXX) prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" err=0 set -e diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 554e12e83c55..0c5aacc446b3 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Add vfs_getname probe to get syscall args filenames +# Add vfs_getname probe to get syscall args filenames (exclusive) # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index f38c8ead0b03..47a26f25db9f 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -40,8 +40,8 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected - echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + echo "((__GI_)?getaddrinfo|text_to_binary_address)\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(gaih_inet|main)\+0x[[:xdigit:]]+[[:space:]]\(inlined|.*/bin/ping.*\)$" >> $expected ;; ppc64|ppc64le) eventattr='max-stack=4' diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 9a61928e3c9a..5940fdc1df37 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Use vfs_getname probe to get syscall args filenames +# Use vfs_getname probe to get syscall args filenames (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf record' using 'touch' to write to a temp file, then diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 048078ee2eca..0fc7a909ae9b 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record tests +# perf record tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -17,6 +17,7 @@ skip_test_missing_symbol ${testsym} err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script) testprog="perf test -w thloop" cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" br_cntr_file="/caps/branch_counter_nr" @@ -93,7 +94,7 @@ test_per_thread() { test_register_capture() { echo "Register capture test" - if ! perf list | grep -q 'br_inst_retired.near_call' + if ! perf list pmu | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" return @@ -228,6 +229,73 @@ test_cgroup() { echo "Cgroup sampling test [Success]" } +test_leader_sampling() { + echo "Basic leader sampling test" + if ! perf record -o "${perfdata}" -e "{instructions,instructions}:Su" -- \ + perf test -w brstack 2> /dev/null + then + echo "Leader sampling [Failed record]" + err=1 + return + fi + index=0 + perf script -i "${perfdata}" > $script_output + while IFS= read -r line + do + # Check if the two instruction counts are equal in each record + instructions=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="instructions:") print $(i-1)}') + if [ $(($index%2)) -ne 0 ] && [ ${instructions}x != ${prev_instructions}x ] + then + echo "Leader sampling [Failed inconsistent instructions count]" + err=1 + return + fi + index=$(($index+1)) + prev_instructions=$instructions + done < $script_output + echo "Basic leader sampling test [Success]" +} + +test_topdown_leader_sampling() { + echo "Topdown leader sampling test" + if ! perf stat -e "{slots,topdown-retiring}" true 2> /dev/null + then + echo "Topdown leader sampling [Skipped event parsing failed]" + return + fi + if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null + then + echo "Topdown leader sampling [Failed topdown events not reordered correctly]" + err=1 + return + fi + echo "Topdown leader sampling test [Success]" +} + +test_precise_max() { + echo "precise_max attribute test" + if ! perf stat -e "cycles,instructions" true 2> /dev/null + then + echo "precise_max attribute [Skipped no hardware events]" + return + fi + # Just to make sure it doesn't fail + if ! perf record -o "${perfdata}" -e "cycles:P" true 2> /dev/null + then + echo "precise_max attribute [Failed cycles:P event]" + err=1 + return + fi + # On AMD, cycles and instructions events are treated differently + if ! perf record -o "${perfdata}" -e "instructions:P" true 2> /dev/null + then + echo "precise_max attribute [Failed instructions:P event]" + err=1 + return + fi + echo "precise_max attribute test [Success]" +} + # raise the limit of file descriptors to minimum if [[ $default_fd_limit -lt $min_fd_limit ]]; then ulimit -Sn $min_fd_limit @@ -239,6 +307,9 @@ test_system_wide test_workload test_branch_counter test_cgroup +test_leader_sampling +test_topdown_leader_sampling +test_precise_max # restore the default value ulimit -Sn $default_fd_limit diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh index 32314641217e..8d750ee631f8 100755 --- a/tools/perf/tests/shell/record_lbr.sh +++ b/tools/perf/tests/shell/record_lbr.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record LBR tests +# perf record LBR tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index 67c925f3a15a..678947fe69ee 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -1,5 +1,5 @@ #!/bin/sh -# perf record offcpu profiling tests +# perf record offcpu profiling tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 3f1e67795490..7a8adf81e4b3 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -73,9 +73,33 @@ test_topdown_groups() { err=1 return fi - if perf stat -e '{topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + if perf stat -e 'instructions,topdown-retiring,slots' true 2>&1 | grep -E -q "<not supported>" then - echo "Topdown event group test [Failed slots not reordered first]" + echo "Topdown event group test [Failed slots not reordered first in no-group case]" + err=1 + return + fi + if perf stat -e '{instructions,topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed slots not reordered first in single group case]" + err=1 + return + fi + if perf stat -e '{instructions,slots},topdown-retiring' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics event not move into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,slots},{topdown-retiring}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics group not merge into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed raw format slots not reordered first]" err=1 return fi @@ -117,16 +141,18 @@ test_cputype() { # Find a known PMU for cputype. pmu="" - for i in cpu cpu_atom armv8_pmuv3_0 + devs="/sys/bus/event_source/devices" + for i in $devs/cpu $devs/cpu_atom $devs/armv8_pmuv3_0 $devs/armv8_cortex_* do - if test -d "/sys/devices/$i" + i_base=$(basename "$i") + if test -d "$i" then - pmu="$i" + pmu="$i_base" break fi - if perf stat -e "$i/instructions/" true > /dev/null 2>&1 + if perf stat -e "$i_base/instructions/" true > /dev/null 2>&1 then - pmu="$i" + pmu="$i_base" break fi done @@ -146,6 +172,30 @@ test_cputype() { echo "cputype test [Success]" } +test_hybrid() { + # Test the default stat command on hybrid devices opens one cycles event for + # each CPU type. + echo "hybrid test" + + # Count the number of core PMUs, assume minimum of 1 + pmus=$(ls /sys/bus/event_source/devices/*/cpus 2>/dev/null | wc -l) + if [ "$pmus" -lt 1 ] + then + pmus=1 + fi + + # Run default Perf stat + cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*| cycles[:uH]* " -c) + + if [ "$pmus" -ne "$cycles_events" ] + then + echo "hybrid test [Found $pmus PMUs but $cycles_events cycles events. Failed]" + err=1 + return + fi + echo "hybrid test [Success]" +} + test_default_stat test_stat_record_report test_stat_record_script @@ -153,4 +203,5 @@ test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups test_cputype +test_hybrid exit $err diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index 55ef9c9ded2d..c6d61a4ac3e7 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -1,9 +1,7 @@ -#!/bin/sh +#!/bin/bash # perf all metricgroups test # SPDX-License-Identifier: GPL-2.0 -set -e - ParanoidAndNotRoot() { [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] @@ -14,11 +12,37 @@ if ParanoidAndNotRoot 0 then system_wide_flag="" fi - +err=0 for m in $(perf list --raw-dump metricgroups) do echo "Testing $m" - perf stat -M "$m" $system_wide_flag sleep 0.01 + result=$(perf stat -M "$m" $system_wide_flag sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] + then + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + else + echo "Metric group $m failed" + echo $result + err=1 # Fail + fi + fi done -exit 0 +exit $err diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index 54774525e18a..73e9347e88a9 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -2,42 +2,87 @@ # perf all metrics test # SPDX-License-Identifier: GPL-2.0 +ParanoidAndNotRoot() +{ + [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] +} + +system_wide_flag="-a" +if ParanoidAndNotRoot 0 +then + system_wide_flag="" +fi + err=0 for m in $(perf list --raw-dump metrics); do echo "Testing $m" - result=$(perf stat -M "$m" true 2>&1) - if [[ "$result" =~ ${m:0:50} ]] || [[ "$result" =~ "<not supported>" ]] + result=$(perf stat -M "$m" $system_wide_flag -- sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] then - continue + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "<not supported>" ]] + then + echo "Not supported events" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] + then + echo "FP issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "PMM" ]] + then + echo "Optane memory issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + fi fi - # Failed so try system wide. - result=$(perf stat -M "$m" -a sleep 0.01 2>&1) + if [[ "$result" =~ ${m:0:50} ]] then continue fi - # Failed again, possibly the workload was too small so retry with something - # longer. - result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + + # Failed, possibly the workload was too small so retry with something longer. + result=$(perf stat -M "$m" $system_wide_flag -- perf bench internals synthesize 2>&1) if [[ "$result" =~ ${m:0:50} ]] then continue fi echo "Metric '$m' not printed in:" echo "$result" - if [[ "$err" != "1" ]] - then - err=2 - if [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] - then - echo "Skip, not fail, for FP issues" - elif [[ "$result" =~ "PMM" ]] - then - echo "Skip, not fail, for Optane memory issues" - else - err=1 - fi - fi + err=1 done exit "$err" diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index d2a3506e0d19..8b148b300be1 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -1,23 +1,51 @@ -#!/bin/sh -# perf all PMU test +#!/bin/bash +# perf all PMU test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e +err=0 +result="" + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + echo "$result" + exit 1 +} +trap trap_cleanup EXIT TERM INT # Test all PMU events; however exclude parameterized ones (name contains '?') -for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do +for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g') +do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) - if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then - # We failed to see the event and it is supported. Possibly the workload was - # too small so retry with something longer. - result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) - if ! echo "$result" | grep -q "$p" ; then - echo "Event '$p' not printed in:" - echo "$result" - exit 1 - fi + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue + fi + if echo "$result" | grep -q "<not supported>" + then + # Event not supported, so ignore. + continue + fi + if echo "$result" | grep -q "Access to performance monitoring and observability operations is limited." + then + # Access is limited, so ignore. + continue + fi + + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue fi + echo "Error: event '$p' not printed in:" + echo "$result" + err=1 done -exit 0 +trap - EXIT TERM INT +exit $err diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index f250b7d6f773..95d2ad5d17c6 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -1,10 +1,10 @@ #!/bin/sh -# perf stat --bpf-counters test +# perf stat --bpf-counters test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e -workload="perf test -w brstack" +workload="perf test -w sqrtloop" # check whether $2 is within +/- 20% of $1 compare_number() diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh index e75d0780dc78..2ec69060c42f 100755 --- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh +++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh @@ -58,22 +58,9 @@ check_system_wide_counted() fi } -check_cpu_list_counted() -{ - check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1) - if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then - echo "Some CPU events are not counted" - if [ "${verbose}" = "1" ]; then - echo ${check_cpu_list_counted_output} - fi - exit 1 - fi -} - check_bpf_counter find_cgroups check_system_wide_counted -check_cpu_list_counted exit 0 diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 3302ea0b9672..573af9235b72 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm CoreSight trace data recording and synthesized samples +# Check Arm CoreSight trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data with Arm CoreSight sinks; # then verify if there have any branch samples and instruction samples @@ -12,7 +12,7 @@ glb_err=0 skip_if_no_cs_etm_event() { - perf list | grep -q 'cs_etm//' && return 0 + perf list pmu | grep -q 'cs_etm//' && return 0 # cs_etm event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_coresight_disasm.sh b/tools/perf/tests/shell/test_arm_coresight_disasm.sh new file mode 100755 index 000000000000..be2d26303f94 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_coresight_disasm.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# Check Arm CoreSight disassembly script completes without errors (exclusive) +# SPDX-License-Identifier: GPL-2.0 + +# The disassembly script reconstructs ranges of instructions and gives these to objdump to +# decode. objdump doesn't like ranges that go backwards, but these are a good indication +# that decoding has gone wrong either in OpenCSD, Perf or in the range reconstruction in +# the script. Test all 3 parts are working correctly by running the script. + +skip_if_no_cs_etm_event() { + perf list pmu | grep -q 'cs_etm//' && return 0 + + # cs_etm event doesn't exist + return 2 +} + +skip_if_no_cs_etm_event || exit 2 + +# Assume an error unless we reach the very end +set -e +glb_err=1 + +perfdata_dir=$(mktemp -d /tmp/__perf_test.perf.data.XXXXX) +perfdata=${perfdata_dir}/perf.data +file=$(mktemp /tmp/temporary_file.XXXXX) +# Relative path works whether it's installed or running from repo +script_path=$(dirname "$0")/../../scripts/python/arm-cs-trace-disasm.py + +cleanup_files() +{ + set +e + rm -rf ${perfdata_dir} + rm -f ${file} + trap - EXIT TERM INT + exit $glb_err +} + +trap cleanup_files EXIT TERM INT + +# Ranges start and end on branches, so check for some likely branch instructions +sep="\s\|\s" +branch_search="\sbl${sep}b${sep}b.ne${sep}b.eq${sep}cbz\s" + +## Test kernel ## +if [ -e /proc/kcore ]; then + echo "Testing kernel disassembly" + perf record -o ${perfdata} -e cs_etm//k --kcore -- touch $file > /dev/null 2>&1 + perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} + grep -q -e ${branch_search} ${file} + echo "Found kernel branches" +else + # kcore is required for correct kernel decode due to runtime code patching + echo "No kcore, skipping kernel test" +fi + +## Test user ## +echo "Testing userspace disassembly" +perf record -o ${perfdata} -e cs_etm//u -- touch $file > /dev/null 2>&1 +perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} +grep -q -e ${branch_search} ${file} +echo "Found userspace branches" + +glb_err=0 diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh index 03d5c7d12ee5..3258368634f7 100755 --- a/tools/perf/tests/shell/test_arm_spe.sh +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm SPE trace data recording and synthesized samples +# Check Arm SPE trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data of Arm SPE events; # then verify if any SPE event samples are generated by SPE with @@ -9,7 +9,7 @@ # German Gomez <german.gomez@arm.com>, 2021 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 # arm_spe event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index 1a7e6a82d0e3..8efeef9fb956 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -5,7 +5,7 @@ # German Gomez <german.gomez@arm.com>, 2022 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 return 2 } diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 3dfa91832aa8..c86da0235059 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Test data symbol +# Test data symbol (exclusive) # SPDX-License-Identifier: GPL-2.0 # Leo Yan <leo.yan@linaro.org>, 2022 diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index 723ec501f99a..e6f0070975f6 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -1,11 +1,11 @@ #!/bin/sh -# Miscellaneous Intel PT testing +# Miscellaneous Intel PT testing (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e # Skip if no Intel PT -perf list | grep -q 'intel_pt//' || exit 2 +perf list pmu | grep -q 'intel_pt//' || exit 2 shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh diff --git a/tools/perf/tests/shell/test_stat_intel_tpebs.sh b/tools/perf/tests/shell/test_stat_intel_tpebs.sh index c60b29add980..f95fc64bf0a7 100755 --- a/tools/perf/tests/shell/test_stat_intel_tpebs.sh +++ b/tools/perf/tests/shell/test_stat_intel_tpebs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# test Intel TPEBS counting mode +# test Intel TPEBS counting mode (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -8,12 +8,15 @@ grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } # Use this event for testing because it should exist in all platforms event=cache-misses:R +# Hybrid platforms output like "cpu_atom/cache-misses/R", rather than as above +alt_name=/cache-misses/R + # Without this cmd option, default value or zero is returned -echo "Testing without --record-tpebs" -result=$(perf stat -e "$event" true 2>&1) -[[ "$result" =~ $event ]] || exit 1 +#echo "Testing without --record-tpebs" +#result=$(perf stat -e "$event" true 2>&1) +#[[ "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 # In platforms that do not support TPEBS, it should execute without error. echo "Testing with --record-tpebs" result=$(perf stat -e "$event" --record-tpebs -a sleep 0.01 2>&1) -[[ "$result" =~ "perf record" && "$result" =~ $event ]] || exit 1 +[[ "$result" =~ "perf record" && "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3146a1eece07..708a13f00635 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check open filename arg using perf trace + vfs_getname +# Check open filename arg using perf trace + vfs_getname (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf trace' using 'touch' to write to a temp file, then @@ -19,7 +19,7 @@ skip_if_no_perf_trace || exit 2 . "$(dirname $0)"/lib/probe_vfs_getname.sh trace_open_vfs_getname() { - evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" + evts="$(echo "$(perf list tracepoint 2>/dev/null | grep -E 'syscalls:sys_enter_open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +\"?${file}\"?, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/shell/trace_exit_race.sh b/tools/perf/tests/shell/trace_exit_race.sh new file mode 100755 index 000000000000..fbb0adc33a88 --- /dev/null +++ b/tools/perf/tests/shell/trace_exit_race.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# perf trace exit race +# SPDX-License-Identifier: GPL-2.0 + +# Check that the last events of a perf trace'd subprocess are not +# lost. Specifically, trace the exiting syscall of "true" 10 times and ensure +# the output contains 10 correct lines. + +# shellcheck source=lib/probe.sh +. "$(dirname $0)"/lib/probe.sh + +skip_if_no_perf_trace || exit 2 + +if [ "$1" = "-v" ]; then + verbose="1" +fi + +iter=10 +regexp=" +[0-9]+\.[0-9]+ [0-9]+ syscalls:sys_enter_exit_group\(\)$" + +trace_shutdown_race() { + for _ in $(seq $iter); do + perf trace --no-comm -e syscalls:sys_enter_exit_group true 2>>$file + done + result="$(grep -c -E "$regexp" $file)" + [ $result = $iter ] +} + + +file=$(mktemp /tmp/temporary_file.XXXXX) + +# Do not use whatever ~/.perfconfig file, it may change the output +# via trace.{show_timestamp,show_prefix,etc} +export PERF_CONFIG=/dev/null + +trace_shutdown_race +err=$? + +if [ $err != 0 ] && [ "${verbose}" = "1" ]; then + lines_not_matching=$(mktemp /tmp/temporary_file.XXXXX) + if grep -v -E "$regexp" $file > $lines_not_matching ; then + echo "Lines not matching the expected regexp: '$regexp':" + cat $lines_not_matching + else + echo "Missing output, expected $iter but only got $result" + fi + rm -f $lines_not_matching +fi + +rm -f ${file} +exit $err diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index d33d0952025c..8e328bbd509d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -152,4 +152,11 @@ out_delete_evlist: return err; } -DEFINE_SUITE("Number of exit events of a simple workload", task_exit); +struct test_case tests__task_exit[] = { + TEST_CASE_EXCLUSIVE("Number of exit events of a simple workload", task_exit), + { .name = NULL, } +}; +struct test_suite suite__task_exit = { + .desc = "Number of exit events of a simple workload", + .test_cases = tests__task_exit, +}; diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index ed114b044293..cf3ae0c1d871 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -175,6 +175,7 @@ static void append_script(int dir_fd, const char *name, char *desc, struct test_suite *test_suite, **result_tmp; struct test_case *tests; size_t len; + char *exclusive; snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd); len = readlink(link, filename, sizeof(filename)); @@ -191,9 +192,13 @@ static void append_script(int dir_fd, const char *name, char *desc, return; } tests[0].name = strdup_check(name); + exclusive = strstr(desc, " (exclusive)"); + if (exclusive != NULL) { + tests[0].exclusive = true; + exclusive[0] = '\0'; + } tests[0].desc = strdup_check(desc); tests[0].run_case = shell_test__run; - test_suite = zalloc(sizeof(*test_suite)); if (!test_suite) { pr_err("Out of memory while building script test suite list\n"); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 6ea2be86b7bf..cb58b43aa063 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -36,6 +36,7 @@ struct test_case { const char *desc; const char *skip_reason; test_fnptr run_case; + bool exclusive; }; struct test_suite { @@ -62,6 +63,14 @@ struct test_suite { .skip_reason = _reason, \ } +#define TEST_CASE_EXCLUSIVE(description, _name) \ + { \ + .name = #_name, \ + .desc = description, \ + .run_case = test__##_name, \ + .exclusive = true, \ + } + #define DEFINE_SUITE(description, _name) \ struct test_case tests__##_name[] = { \ TEST_CASE(description, _name), \ @@ -83,6 +92,8 @@ DECLARE_SUITE(perf_evsel__tp_sched_test); DECLARE_SUITE(syscall_openat_tp_fields); DECLARE_SUITE(pmu); DECLARE_SUITE(pmu_events); +DECLARE_SUITE(hwmon_pmu); +DECLARE_SUITE(tool_pmu); DECLARE_SUITE(attr); DECLARE_SUITE(dso_data); DECLARE_SUITE(dso_data_cache); diff --git a/tools/perf/tests/tool_pmu.c b/tools/perf/tests/tool_pmu.c new file mode 100644 index 000000000000..187942b749b7 --- /dev/null +++ b/tools/perf/tests/tool_pmu.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "parse-events.h" +#include "tests.h" +#include "tool_pmu.h" + +static int do_test(enum tool_pmu_event ev, bool with_pmu) +{ + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "tool/%s/", tool_pmu__event_to_str(ev)); + else + snprintf(str, sizeof(str), "%s", tool_pmu__event_to_str(ev)); + + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + if (tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + ret = TEST_OK; + goto out; + } + + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (perf_pmu__is_tool(evsel->pmu)) { + if (evsel->core.attr.config != ev) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %d\n", + __FILE__, __LINE__, str, evsel->core.attr.config, ev); + ret = TEST_FAIL; + goto out; + } + found = true; + } + } + + if (!found && !tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + pr_debug("FAILED %s:%d Didn't find tool event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__tool_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/false); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static int test__tool_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/true); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static struct test_case tests__tool_pmu[] = { + TEST_CASE("Parsing without PMU name", tool_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", tool_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__tool_pmu = { + .desc = "Tool PMU", + .test_cases = tests__tool_pmu, +}; diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h index 13aea8fc3d45..47051871b436 100644 --- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h +++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h @@ -18,8 +18,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... LOCAL_TIMER_VECTOR-1 - * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts + * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts + * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh index 456f59addf74..fac4d0c049fc 100755 --- a/tools/perf/trace/beauty/fs_at_flags.sh +++ b/tools/perf/trace/beauty/fs_at_flags.sh @@ -13,9 +13,15 @@ printf "static const char *fs_at_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' # AT_EACCESS is only meaningful to faccessat, so we will special case it there... # AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC +# AT_HANDLE_FID, AT_HANDLE_MNT_ID_UNIQUE and AT_HANDLE_CONNECTABLE are reusing values and are valid only for name_to_handle_at() +# AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2() grep -E $regex ${linux_fcntl} | \ grep -v AT_EACCESS | \ grep -v AT_STATX_SYNC_TYPE | \ + grep -v AT_HANDLE_FID | \ + grep -v AT_HANDLE_MNT_ID_UNIQUE | \ + grep -v AT_HANDLE_CONNECTABLE | \ + grep -v AT_RENAME_NOREPLACE | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index c0bcc185fa48..6e6907e63bfc 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -16,6 +16,9 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) +/* Was the file just created? */ +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) + /* * Cancel a blocking posix lock; internal use only until we expose an * asynchronous lock api to userspace: @@ -87,39 +90,69 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value for dirfd used to + indicate openat should use the + current working directory. */ + + +/* Generic flags for the *at(2) family of syscalls. */ + +/* Reserved for per-syscall flags 0xff. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic + links. */ +/* Reserved for per-syscall flags 0x200 */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount + traversal. */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative + pathname to operate on dirfd + directly. */ +/* + * These flags are currently statx(2)-specific, but they could be made generic + * in the future and so they should not be used for other per-syscall flags. + */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to - * unlinkat. The two functions do completely different things and therefore, - * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to - * faccessat would be undefined behavior and thus treating it equivalent to - * AT_EACCESS is valid undefined behavior. + * Per-syscall flags for the *at(2) family of syscalls. + * + * These are flags that are so syscall-specific that a user passing these flags + * to the wrong syscall is so "clearly wrong" that we can safely call such + * usage "undefined behaviour". + * + * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value. + * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful + * only to unlinkat. The two functions do completely different things and + * therefore, the flags can be allowed to overlap. For example, passing + * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it + * equivalent to AT_EACCESS is valid undefined behavior. + * + * Note for implementers: When picking a new per-syscall AT_* flag, try to + * reuse already existing flags first. This leaves us with as many unused bits + * as possible, so we can use them for generic bits in the future if necessary. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ + +/* Flags for renameat2(2) (must match legacy RENAME_* flags). */ +#define AT_RENAME_NOREPLACE 0x0001 +#define AT_RENAME_EXCHANGE 0x0002 +#define AT_RENAME_WHITEOUT 0x0004 + +/* Flag for faccessat(2). */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ +/* Flag for unlinkat(2). */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ -#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ -#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ -#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ - -#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ -#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ -#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ -#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ - -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ - -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ -#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to - compare object identity and may not - be usable to open_by_handle_at(2) */ -#if defined(__KERNEL__) -#define AT_GETATTR_NOSEC 0x80000000 -#endif +/* Flags for name_to_handle_at(2). */ +#define AT_HANDLE_FID 0x200 /* File handle is needed to compare + object identity and may not be + usable with open_by_handle_at(2). */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ +#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index 225bc366ffcb..c07008816aca 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 mnt_opts; /* [str] Mount options of the mount */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -173,7 +173,13 @@ struct statmount { __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ - __u64 __spare2[49]; + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 __spare2[46]; char str[]; /* Variable size part containing strings */ }; @@ -207,6 +213,10 @@ struct mnt_id_req { #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ /* * Special @mnt_id values that can be passed to listmount diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 35791791a879..5c6080680cb2 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h index 3bac0a8ceab2..359a14cc76a4 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/sched.h +++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h @@ -118,6 +118,7 @@ struct clone_args { /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 #define SCHED_DEADLINE 6 +#define SCHED_EXT 7 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 8bf7e8a0eb6f..4cd513215bcd 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -869,7 +869,7 @@ struct snd_ump_block_info { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) enum { SNDRV_TIMER_CLASS_NONE = -1, @@ -894,6 +894,7 @@ enum { #define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */ #define SNDRV_TIMER_GLOBAL_HPET 2 #define SNDRV_TIMER_GLOBAL_HRTIMER 3 +#define SNDRV_TIMER_GLOBAL_UDRIVEN 4 /* info flags */ #define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ @@ -974,6 +975,18 @@ struct snd_timer_status { }; #endif +/* + * This structure describes the userspace-driven timer. Such timers are purely virtual, + * and can only be triggered from software (for instance, by userspace application). + */ +struct snd_timer_uinfo { + /* To pretend being a normal timer, we need to know the resolution in ns. */ + __u64 resolution; + int fd; + unsigned int id; + unsigned char reserved[16]; +}; + #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) #define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int) @@ -990,6 +1003,8 @@ struct snd_timer_status { #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) #define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int) +#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo) +#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6) #if __BITS_PER_LONG == 64 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index ed3ff969b546..2da581ff0c80 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -11,6 +11,9 @@ #ifndef MSG_BATCH #define MSG_BATCH 0x40000 #endif +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif @@ -57,6 +60,7 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, P_MSG_FLAG(MORE); P_MSG_FLAG(WAITFORONE); P_MSG_FLAG(BATCH); + P_MSG_FLAG(SOCK_DEVMEM); P_MSG_FLAG(ZEROCOPY); P_MSG_FLAG(SPLICE_PAGES); P_MSG_FLAG(FASTOPEN); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index dc616292b2dd..c06d2ee9024c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -83,6 +83,8 @@ perf-util-y += pmu.o perf-util-y += pmus.o perf-util-y += pmu-flex.o perf-util-y += pmu-bison.o +perf-util-y += hwmon_pmu.o +perf-util-y += tool_pmu.o perf-util-y += svghelper.o perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o perf-util-y += trace-event-scripting.o @@ -199,11 +201,14 @@ ifndef CONFIG_SETNS perf-util-y += setns.o endif -perf-util-$(CONFIG_DWARF) += probe-finder.o -perf-util-$(CONFIG_DWARF) += dwarf-aux.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += debuginfo.o -perf-util-$(CONFIG_DWARF) += annotate-data.o +perf-util-$(CONFIG_LIBDW) += probe-finder.o +perf-util-$(CONFIG_LIBDW) += dwarf-aux.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o +perf-util-$(CONFIG_LIBDW) += debuginfo.o +perf-util-$(CONFIG_LIBDW) += annotate-data.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o @@ -234,7 +239,7 @@ perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o ifdef CONFIG_JITDUMP perf-util-$(CONFIG_LIBELF) += jitdump.o perf-util-$(CONFIG_LIBELF) += genelf.o -perf-util-$(CONFIG_DWARF) += genelf_debug.o +perf-util-$(CONFIG_LIBDW) += genelf_debug.o endif perf-util-y += perf-hooks.o diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 8ac0fd94a0ba..98c80b2268dd 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -9,7 +9,7 @@ #include "dwarf-regs.h" #include "annotate.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "debuginfo.h" #endif @@ -165,7 +165,7 @@ struct annotated_data_stat { }; extern struct annotated_data_stat ann_data_stat; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* * Type information in a register, valid when @ok is true. * The @caller_saved registers are invalidated after a function call. @@ -244,7 +244,7 @@ bool get_global_var_info(struct data_loc_info *dloc, u64 addr, const char **var_name, int *var_offset); void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline struct annotated_data_type * find_data_type(struct data_loc_info *dloc __maybe_unused) @@ -276,7 +276,7 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un return -1; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_SLANG_SUPPORT int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 37ce43c4eb8f..32e15c9f53f3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2116,6 +2116,12 @@ static int annotation__config(const char *var, const char *value, void *data) opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.disassemblers")) { + opt->disassemblers_str = strdup(value); + if (!opt->disassemblers_str) { + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; + } } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2292,7 +2298,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg1 = get_dwarf_regnum(regname, 0); + op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); /* Get the second register */ @@ -2305,7 +2311,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg2 = get_dwarf_regnum(regname, 0); + op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); } return 0; @@ -2405,7 +2411,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, return -1; if (*s == arch->objdump.register_char) - op_loc->reg1 = get_dwarf_regnum(s, 0); + op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags); else if (*s == arch->objdump.imm_char) { op_loc->offset = strtol(s + 1, &p, 0); if (p && p != s + 1) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8b9e05a1932f..194a05cbc506 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,6 +34,9 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 +// llvm, capstone, objdump +#define MAX_DISASSEMBLERS 3 + struct annotation_options { bool hide_src_code, use_offset, @@ -49,11 +52,14 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; + u8 nr_disassemblers; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; + const char *disassemblers_str; + const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..358c611eeddb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -56,15 +56,15 @@ enum arm_spe_op_type { ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, }; struct arm_spe_record { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 138ffc71b32d..dbf13f47879c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -46,7 +46,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -69,7 +68,7 @@ struct arm_spe { u64 llc_access_id; u64 tlb_miss_id; u64 tlb_access_id; - u64 branch_miss_id; + u64 branch_id; u64 remote_access_id; u64 memory_id; u64 instructions_id; @@ -78,6 +77,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -96,6 +100,7 @@ struct arm_spe_queue { u64 timestamp; struct thread *thread; u64 period_instructions; + u32 flags; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -118,7 +123,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -273,6 +278,20 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; @@ -376,6 +395,7 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.stream_id = spe_events_id; sample.addr = record->to_ip; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -400,24 +420,44 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; sample.data_src = data_src; sample.period = spe->instructions_sample_period; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__sample_flags(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + + speq->flags = 0; + if (record->op & ARM_SPE_OP_BRANCH_ERET) { + speq->flags = PERF_IP_FLAG_BRANCH; + + if (record->type & ARM_SPE_BRANCH_MISS) + speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + } +} + +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -439,17 +479,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -458,8 +498,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -467,7 +507,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -476,13 +516,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: + case ARM_SPE_COMMON_DS_REMOTE: data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_COMMON_DS_DRAM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; @@ -492,8 +532,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src->mem_lvl = PERF_MEM_LVL_L3; @@ -515,10 +555,55 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + bool is_in_cpu_list; + u64 *metadata = NULL; + u64 midr = 0; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(spe->session->evlist->env); + midr = strtol(cpuid, NULL, 16); + } else { + /* CPU ID is -1 for per-thread mode */ + if (speq->cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return false; + + /* In homogeneous system, simply use CPU0's metadata */ + if (spe->metadata) + metadata = spe->metadata[0]; + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + } + + if (!metadata) + return false; + + midr = metadata[ARM_SPE_CPU_MIDR]; + } + + is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus); + if (is_in_cpu_list) + return true; + else + return false; +} + +static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_common = arm_spe__is_common_ds_encoding(speq); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -527,10 +612,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m else return 0; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); + if (is_common) + arm_spe__synth_data_source_common(record, &data_src); else - arm_spe__synth_data_source_generic(record, &data_src); + arm_spe__synth_memory_level(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -551,7 +636,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + arm_spe__sample_flags(speq); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -601,8 +687,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } - if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { + err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) return err; } @@ -1016,6 +1102,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1050,6 +1203,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1061,16 +1215,60 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", +}; + +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", +}; + +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; + if (!dump_trace) return; - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } + + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); + + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } static void arm_spe_set_event_name(struct evlist *evlist, u64 id, @@ -1202,12 +1400,12 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) if (spe->synth_opts.branches) { spe->sample_branch = true; - /* Branch miss */ + /* Branch */ err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; - spe->branch_miss_id = id; - arm_spe_set_event_name(evlist, id, "branch-miss"); + spe->branch_id = id; + arm_spe_set_event_name(evlist, id, "branch"); id += 1; } @@ -1258,24 +1456,57 @@ synth_instructions_out: return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1284,8 +1515,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1318,7 +1555,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; @@ -1346,5 +1583,7 @@ err_free_queues: session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 4f4900c18f3e..390679a4af2f 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,46 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a1895a4f530b..dddaf4f3ffed 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -75,7 +75,6 @@ enum itrace_period_type { * (not fully accurate, since CYC packets are only emitted * together with other events, such as branches) * @branches: whether to synthesize 'branches' events - * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -650,7 +649,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ " y[period]: synthesize cycles events (same period as i)\n" \ -" b: synthesize branches events (branch misses for Arm SPE)\n" \ +" b: synthesize branches events\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index e87b6789eb9e..a4fdf6911ec1 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -375,7 +375,7 @@ static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *en pfi = zalloc(sizeof(*pfi)); if (pfi == NULL) { pr_err("Cannot save pinned filter index\n"); - goto err; + return -ENOMEM; } pfi->evsel = evsel; diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h deleted file mode 100644 index 66dcf751ef65..000000000000 --- a/tools/perf/util/bpf-prologue.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, He Kuang <hekuang@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_PROLOGUE_H -#define __BPF_PROLOGUE_H - -struct probe_trace_arg; -struct bpf_insn; - -#define BPF_PROLOGUE_MAX_ARGS 3 -#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 -#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 - -#ifdef HAVE_BPF_PROLOGUE -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space); -#else -#include <linux/compiler.h> -#include <errno.h> - -static inline int -bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, - int nargs __maybe_unused, - struct bpf_insn *new_prog __maybe_unused, - size_t *new_cnt, - size_t cnt_space __maybe_unused) -{ - if (!new_cnt) - return -EINVAL; - *new_cnt = 0; - return -ENOTSUP; -} -#endif -#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7a8af60e0f51..73fcafbffc6a 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -394,6 +394,7 @@ static int bperf_check_target(struct evsel *evsel, } static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -444,12 +445,32 @@ out: return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; @@ -529,9 +550,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -543,6 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -553,12 +572,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -623,7 +644,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index b2f17cca014b..4a62ed593e84 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -288,6 +288,10 @@ int sys_enter_rename(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -315,6 +319,10 @@ int sys_enter_renameat2(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -423,8 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, output = 0, + int zero = 0, size, aug_size, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -477,6 +486,8 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) augmented = true; } else if (size < 0 && size >= -6) { /* buffer */ index = -(size + 1); + barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. + index &= 7; // Satisfy the bounds checking with the verifier in some kernels. aug_size = args->args[index]; if (aug_size > TRACE_AUG_MAX_BUF) @@ -488,10 +499,17 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } + /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */ + if (aug_size > value_size) + aug_size = value_size; + /* write data to payload */ if (augmented) { int written = offsetof(struct augmented_arg, value) + aug_size; + if (written < 0 || written > sizeof(struct augmented_arg)) + return 1; + ((struct augmented_arg *)payload_offset)->size = aug_size; output += written; payload_offset += written; @@ -499,7 +517,7 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } - if (!do_output) + if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter)) return 1; return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output); diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ do_add: return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8982f68e7230..e763e8d99a43 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -277,7 +277,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid struct perf_record_header_build_id b; size_t len; - len = sizeof(b) + name_len + 1; + len = name_len + 1; len = PERF_ALIGN(len, sizeof(u64)); memset(&b, 0, sizeof(b)); @@ -286,7 +286,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid misc |= PERF_RECORD_MISC_BUILD_ID_SIZE; b.pid = pid; b.header.misc = misc; - b.header.size = len; + b.header.size = sizeof(b) + len; err = do_write(fd, &b, sizeof(b)); if (err < 0) diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index 7574a67651bc..69d9a2bcd40b 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -7,13 +7,9 @@ #include "debug.h" #include <errno.h> #include <string.h> -#include <unistd.h> #include <linux/capability.h> #include <sys/syscall.h> - -#ifndef SYS_capget -#define SYS_capget 90 -#endif +#include <unistd.h> #define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 @@ -21,9 +17,9 @@ bool perf_cap__capable(int cap, bool *used_root) { struct __user_cap_header_struct header = { .version = _LINUX_CAPABILITY_VERSION_3, - .pid = getpid(), + .pid = 0, }; - struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S]; + struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {}; __u32 cap_val; *used_root = false; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index bffbdd216a6a..e51f0a676a22 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - - while (count) { - char *p = memchr(buf, '\n', count); - - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - const char *get_percent_color(double percent) { const char *color = PERF_COLOR_NORMAL; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 01f7bed21c9b..9a7248dbe2d7 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -2,6 +2,7 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include <linux/compiler.h> #include <stdio.h> #include <stdarg.h> @@ -22,6 +23,7 @@ #define MIN_GREEN 0.5 #define MIN_RED 5.0 +#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r" /* * This variable stores the value of color.ui */ @@ -37,12 +39,11 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4); +int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); -int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); +int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7a650de0db83..68f9407ca74b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -912,6 +912,7 @@ void set_buildid_dir(const char *dir) struct perf_config_scan_data { const char *name; const char *fmt; + const char *value; va_list args; int ret; }; @@ -939,3 +940,24 @@ int perf_config_scan(const char *name, const char *fmt, ...) return d.ret; } + +static int perf_config_get_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->value = value; + + return 0; +} + +const char *perf_config_get(const char *name) +{ + struct perf_config_scan_data d = { + .name = name, + .value = NULL, + }; + + perf_config(perf_config_get_cb, &d); + return d.value; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2e5e808928a5..9971313d61c1 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); +const char *perf_config_get(const char *name); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index b78ef0262135..b85a8837bddc 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -685,9 +685,14 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, } if (d_params->operation == CS_ETM_OPERATION_DECODE) { + int decode_flags = OCSD_CREATE_FLG_FULL_DECODER; +#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK + decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE | + ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK; +#endif if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, + decode_flags, trace_config, &csid)) return -1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 90f32f327b9b..0bf9e5c27b59 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2490,12 +2490,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) /* Ignore return value */ cs_etm__process_traceid_queue(etmq, tidq); - - /* - * Generate an instruction sample with the remaining - * branchstack entries. - */ - cs_etm__flush(etmq, tidq); } } @@ -2638,7 +2632,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) while (1) { if (!etm->heap.heap_cnt) - goto out; + break; /* Take the entry at the top of the min heap */ cs_queue_nr = etm->heap.heap_array[0].queue_nr; @@ -2721,6 +2715,23 @@ refetch: ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } + for (i = 0; i < etm->queues.nr_queues; i++) { + struct int_node *inode; + + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + int idx = (int)(intptr_t)inode->priv; + + /* Flush any remaining branch stack entries */ + tidq = etmq->traceid_queues[idx]; + ret = cs_etm__end_block(etmq, tidq); + if (ret) + return ret; + } + } out: return ret; } @@ -3323,7 +3334,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) * Don't create decoders for empty queues, mainly because * etmq->format is unknown for empty queues. */ - assert(empty == (etmq->format == UNSET)); + assert(empty || etmq->format != UNSET); if (empty) continue; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 021e9b1d5cc5..f0599c61fab4 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -36,7 +36,7 @@ #include "util/sample.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #define pr_N(n, fmt, ...) \ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 20bfb0884e9e..8304cd2d4a9c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -28,7 +28,7 @@ #include "util/tool.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct convert_json { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d633d15329fa..995f6bb05b5f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -27,7 +27,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #else #define LIBTRACEEVENT_VERSION 0 #endif diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h index ad6422c3f8ca..a52d69932815 100644 --- a/tools/perf/util/debuginfo.h +++ b/tools/perf/util/debuginfo.h @@ -5,7 +5,7 @@ #include <errno.h> #include <linux/compiler.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" @@ -25,7 +25,7 @@ void debuginfo__delete(struct debuginfo *dbg); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, bool adjust_offset); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ /* dummy debug information structure */ struct debuginfo { @@ -49,7 +49,7 @@ static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unuse return -EINVAL; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_DEBUGINFOD_SUPPORT int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index f05ba7739c1e..41a2b08670dc 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -18,6 +18,7 @@ #include "disasm.h" #include "disasm_bpf.h" #include "dso.h" +#include "dwarf-regs.h" #include "env.h" #include "evsel.h" #include "map.h" @@ -151,14 +152,14 @@ static struct arch architectures[] = { .memory_ref_char = '(', .imm_char = '$', }, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_x86, #endif }, { .name = "powerpc", .init = powerpc__annotate_init, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_powerpc, #endif }, @@ -1423,6 +1424,15 @@ err: } #endif +#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT) +static void symbol__disassembler_missing(const char *disassembler, const char *filename, + struct symbol *sym) +{ + pr_debug("The %s disassembler isn't linked in for %s in %s\n", + disassembler, sym->name, filename); +} +#endif + #ifdef HAVE_LIBCAPSTONE_SUPPORT static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, struct annotate_args *args, u64 addr) @@ -1573,7 +1583,7 @@ static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *s dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); @@ -1603,18 +1613,6 @@ out: err: if (fd >= 0) close(fd); - if (needs_cs_close) { - struct disasm_line *tmp; - - /* - * It probably failed in the middle of the above loop. - * Release any resources it might add. - */ - list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { - list_del(&dl->al.node); - free(dl); - } - } count = -1; goto out; } @@ -1627,12 +1625,12 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, u64 start = map__rip_2objdump(map, sym->start); u64 len; u64 offset; - int i, count; + int i, count, free_count; bool is_64bit = false; bool needs_cs_close = false; u8 *buf = NULL; csh handle; - cs_insn *insn; + cs_insn *insn = NULL; char disasm_buf[512]; struct disasm_line *dl; @@ -1664,7 +1662,7 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, needs_cs_close = true; - count = cs_disasm(handle, buf, len, start, len, &insn); + free_count = count = cs_disasm(handle, buf, len, start, len, &insn); for (i = 0, offset = 0; i < count; i++) { int printed; @@ -1702,8 +1700,11 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, } out: - if (needs_cs_close) + if (needs_cs_close) { cs_close(&handle); + if (free_count > 0) + cs_free(insn, free_count); + } free(buf); return count < 0 ? count : 0; @@ -1717,13 +1718,27 @@ err: */ list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { list_del(&dl->al.node); - free(dl); + disasm_line__free(dl); } } count = -1; goto out; } -#endif +#else // HAVE_LIBCAPSTONE_SUPPORT +static int symbol__disassemble_capstone(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone", filename, sym); + return -1; +} + +static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone powerpc", filename, sym); + return -1; +} +#endif // HAVE_LIBCAPSTONE_SUPPORT static int symbol__disassemble_raw(char *filename, struct symbol *sym, struct annotate_args *args) @@ -1782,7 +1797,7 @@ static int symbol__disassemble_raw(char *filename, struct symbol *sym, sprintf(args->line, "%x", line[i]); dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); offset += 4; @@ -1991,7 +2006,14 @@ err: free(line_storage); return ret; } -#endif +#else // HAVE_LIBLLVM_SUPPORT +static int symbol__disassemble_llvm(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("LLVM", filename, sym); + return -1; +} +#endif // HAVE_LIBLLVM_SUPPORT /* * Possibly create a new version of line with tabs expanded. Returns the @@ -2053,17 +2075,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) return new_line; } -int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, + struct annotate_args *args) { struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract = false; - bool decomp = false; int lineno = 0; char *fileloc = NULL; int nline; @@ -2078,77 +2097,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) NULL, }; struct child_process objdump_process; - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); - - if (err) - return err; - - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso__long_name(dso), sym, sym->name); - - if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { - return symbol__disassemble_bpf_image(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { - return -1; - } else if (dso__is_kcore(dso)) { - kce.kcore_filename = symfs_filename; - kce.addr = map__rip_2objdump(map, sym->start); - kce.offs = sym->start; - kce.len = sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract = true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp = true; - strcpy(symfs_filename, tmp); - } - - /* - * For powerpc data type profiling, use the dso__data_read_offset - * to read raw instruction directly and interpret the binary code - * to understand instructions and register fields. For sort keys as - * type and typeoff, disassemble to mnemonic notation is - * not required in case of powerpc. - */ - if (arch__is(args->arch, "powerpc")) { - extern const char *sort_order; - - if (sort_order && !strstr(sort_order, "sym")) { - err = symbol__disassemble_raw(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif - } - } - -#ifdef HAVE_LIBLLVM_SUPPORT - err = symbol__disassemble_llvm(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif + int err; err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 @@ -2171,13 +2120,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; + return err; } pr_debug("Executing: %s\n", command); objdump_argv[2] = command; - objdump_argv[4] = symfs_filename; + objdump_argv[4] = filename; /* Create a pipe to read from for stdout */ memset(&objdump_process, 0, sizeof(objdump_process)); @@ -2215,8 +2164,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) break; /* Skip lines containing "filename:" */ - match = strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] == ':') + match = strstr(line, filename); + if (match && match[strlen(filename)] == ':') continue; expanded_line = strim(line); @@ -2261,7 +2210,150 @@ out_close_stdout: out_free_command: free(command); + return err; +} + +static int annotation_options__init_disassemblers(struct annotation_options *options) +{ + char *disassembler; + + if (options->disassemblers_str == NULL) { + const char *default_disassemblers_str = +#ifdef HAVE_LIBLLVM_SUPPORT + "llvm," +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + "capstone," +#endif + "objdump"; + + options->disassemblers_str = strdup(default_disassemblers_str); + if (!options->disassemblers_str) + goto out_enomem; + } + + disassembler = strdup(options->disassemblers_str); + if (disassembler == NULL) + goto out_enomem; + + while (1) { + char *comma = strchr(disassembler, ','); + + if (comma != NULL) + *comma = '\0'; + + options->disassemblers[options->nr_disassemblers++] = strim(disassembler); + + if (comma == NULL) + break; + + disassembler = comma + 1; + + if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { + pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", + MAX_DISASSEMBLERS, disassembler); + break; + } + } + + return 0; + +out_enomem: + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *options = args->options; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char symfs_filename[PATH_MAX]; + bool delete_extract = false; + struct kcore_extract kce; + const char *disassembler; + bool decomp = false; + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + + if (err) + return err; + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + return -1; + } else if (dso__is_kcore(dso)) { + kce.addr = map__rip_2objdump(map, sym->start); + kce.kcore_filename = symfs_filename; + kce.len = sym->end - sym->start; + kce.offs = sym->start; + + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) + return -1; + + decomp = true; + strcpy(symfs_filename, tmp); + } + + /* + * For powerpc data type profiling, use the dso__data_read_offset to + * read raw instruction directly and interpret the binary code to + * understand instructions and register fields. For sort keys as type + * and typeoff, disassemble to mnemonic notation is not required in + * case of powerpc. + */ + if (arch__is(args->arch, "powerpc")) { + extern const char *sort_order; + + if (sort_order && !strstr(sort_order, "sym")) { + err = symbol__disassemble_raw(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + } + + err = annotation_options__init_disassemblers(options); + if (err) + goto out_remove_tmp; + + err = -1; + + for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { + disassembler = options->disassemblers[i]; + + if (!strcmp(disassembler, "llvm")) + err = symbol__disassemble_llvm(symfs_filename, sym, args); + else if (!strcmp(disassembler, "capstone")) + err = symbol__disassemble_capstone(symfs_filename, sym, args); + else if (!strcmp(disassembler, "objdump")) + err = symbol__disassemble_objdump(symfs_filename, sym, args); + else + pr_debug("Unknown disassembler %s, skipping...\n", disassembler); + } + + if (err == 0) { + pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", + disassembler, options->disassemblers_str); + } out_remove_tmp: if (decomp) unlink(symfs_filename); diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h index f56beedeb9da..c135db2416b5 100644 --- a/tools/perf/util/disasm.h +++ b/tools/perf/util/disasm.h @@ -4,7 +4,7 @@ #include "map_symbol.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #endif @@ -39,11 +39,15 @@ struct arch { char memory_ref_char; char imm_char; } objdump; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT void (*update_insn_state)(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl); #endif + /** @e_machine: ELF machine associated with arch. */ + unsigned int e_machine; + /** @e_flags: Optional ELF flags associated with arch. */ + unsigned int e_flags; }; struct ins { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 92eb9c8dc3e5..559c953ca172 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1182,7 +1182,6 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) static int reg_from_dwarf_op(Dwarf_Op *op) { switch (op->atom) { @@ -1245,9 +1244,7 @@ static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) } return true; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1697,9 +1694,7 @@ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types) die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem); } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info @@ -1732,7 +1727,6 @@ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset) } return -1; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ /* * die_has_loclist - Check if DW_AT_location of @vr_die is a location list diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 336a3a183a78..892c8c5c23fc 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -156,8 +156,6 @@ Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_m /* Return type info where the pointer and offset point to */ Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem); -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT - /* Get byte offset range of given variable DIE */ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); @@ -176,58 +174,7 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); /* Save all global variables in this CU */ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types); -#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) -{ - return -ENOTSUP; -} - -static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr pc __maybe_unused, - int reg __maybe_unused, - int *poffset __maybe_unused, - bool is_fbreg __maybe_unused, - Dwarf_Die *die_mem __maybe_unused) -{ - return NULL; -} - -static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr addr __maybe_unused, - Dwarf_Die *die_mem __maybe_unused, - int *offset __maybe_unused) -{ - return NULL; -} - -static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -#ifdef HAVE_DWARF_CFI_SUPPORT - /* Get the frame base information from CFA */ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset); -#else /* HAVE_DWARF_CFI_SUPPORT */ - -static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused, - int *preg __maybe_unused, int *poffset __maybe_unused) -{ - return -1; -} - -#endif /* HAVE_DWARF_CFI_SUPPORT */ - #endif /* _DWARF_AUX_H */ diff --git a/tools/perf/arch/csky/util/dwarf-regs.c b/tools/perf/util/dwarf-regs-csky.c index ca86ecaeacbb..d38ef1f07f3e 100644 --- a/tools/perf/arch/csky/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs-csky.c @@ -5,9 +5,8 @@ #include <stddef.h> #include <dwarf-regs.h> -#if defined(__CSKYABIV2__) -#define CSKY_MAX_REGS 73 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { +#define CSKY_ABIV2_MAX_REGS 73 +const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = { /* r0 ~ r8 */ "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", /* r9 ~ r15 */ @@ -26,9 +25,9 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#else -#define CSKY_MAX_REGS 57 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { + +#define CSKY_ABIV1_MAX_REGS 57 +const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = { /* r0 ~ r8 */ "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", /* r9 ~ r15 */ @@ -41,9 +40,11 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#endif -const char *get_arch_regstr(unsigned int n) +const char *get_csky_regstr(unsigned int n, unsigned int flags) { - return (n < CSKY_MAX_REGS) ? csky_dwarf_regs_table[n] : NULL; + if (flags & EF_CSKY_ABIV2) + return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL; + + return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL; } diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c new file mode 100644 index 000000000000..caf77a234c78 --- /dev/null +++ b/tools/perf/util/dwarf-regs-powerpc.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Ian Munsie, IBM Corporation. + */ + +#include <dwarf-regs.h> + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c new file mode 100644 index 000000000000..7a55c65e8da6 --- /dev/null +++ b/tools/perf/util/dwarf-regs-x86.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/kernel.h> /* for ARRAY_SIZE */ +#include <dwarf-regs.h> + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_x86_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 5b7f86c0063f..28a1cfdf26d4 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -13,14 +13,6 @@ #include <errno.h> #include <linux/kernel.h> -#ifndef EM_AARCH64 -#define EM_AARCH64 183 /* ARM 64 bit */ -#endif - -#ifndef EM_LOONGARCH -#define EM_LOONGARCH 258 /* LoongArch */ -#endif - /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -28,6 +20,7 @@ #include "../arch/arm64/include/dwarf-regs-table.h" #include "../arch/sh/include/dwarf-regs-table.h" #include "../arch/powerpc/include/dwarf-regs-table.h" +#include "../arch/riscv/include/dwarf-regs-table.h" #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" @@ -37,11 +30,13 @@ #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) /* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine) +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags) { + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ - return get_arch_regstr(n); case EM_386: return __get_dwarf_regstr(x86_32_regstr_tbl, n); case EM_X86_64: @@ -50,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(arm_regstr_tbl, n); case EM_AARCH64: return __get_dwarf_regstr(aarch64_regstr_tbl, n); + case EM_CSKY: + return get_csky_regstr(n, flags); case EM_SH: return __get_dwarf_regstr(sh_regstr_tbl, n); case EM_S390: @@ -57,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) case EM_PPC: case EM_PPC64: return __get_dwarf_regstr(powerpc_regstr_tbl, n); + case EM_RISCV: + return __get_dwarf_regstr(riscv_regstr_tbl, n); case EM_SPARC: case EM_SPARCV9: return __get_dwarf_regstr(sparc_regstr_tbl, n); @@ -72,13 +71,15 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return NULL; } +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 __weak int get_arch_regnum(const char *name __maybe_unused) { return -ENOTSUP; } +#endif /* Return DWARF register number from architecture register name */ -int get_dwarf_regnum(const char *name, unsigned int machine) +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused) { char *regname = strdup(name); int reg = -1; @@ -92,10 +93,21 @@ int get_dwarf_regnum(const char *name, unsigned int machine) if (p) *p = '\0'; + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 + case EM_HOST: reg = get_arch_regnum(regname); break; +#endif + case EM_X86_64: + fallthrough; + case EM_386: + reg = get_x86_regnum(regname); + break; default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 1edbccfc3281..e2843ca2edd9 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,12 +5,14 @@ #include "util/header.h" #include "linux/compiler.h" #include <linux/ctype.h> +#include <linux/string.h> #include <linux/zalloc.h> #include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmu.h" #include "pmus.h" #include "strbuf.h" #include "trace/beauty/beauty.h" @@ -372,7 +374,8 @@ error: int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; - int err = get_cpuid(cpuid, sizeof(cpuid)); + struct perf_cpu cpu = {-1}; + int err = get_cpuid(cpuid, sizeof(cpuid), cpu); if (err) return err; @@ -639,3 +642,25 @@ void perf_env__find_br_cntr_info(struct perf_env *env, env->pmu_caps->br_cntr_width; } } + +bool perf_env__is_x86_amd_cpu(struct perf_env *env) +{ + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd == 0) + is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1; + + return is_amd >= 1 ? true : false; +} + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_amd; + + perf_env__cpuid(&env); + is_amd = perf_env__is_x86_amd_cpu(&env); + perf_env__exit(&env); + + return is_amd; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 51b36c36019b..ae604c4edbb7 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -195,4 +195,8 @@ bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); void perf_env__find_br_cntr_info(struct perf_env *env, unsigned int *nr, unsigned int *width); + +bool x86__is_amd_cpu(void); +bool perf_env__is_x86_amd_cpu(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f8742e6230a5..2744c54f404e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -66,6 +66,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, + PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, }; #define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f14b7e6ff1dc..f0dd174e2deb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -48,6 +48,7 @@ #include <sys/mman.h> #include <sys/prctl.h> #include <sys/timerfd.h> +#include <sys/wait.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -319,62 +320,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) } #endif -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - struct evsel *evsel, *n; - LIST_HEAD(head); - size_t i; - - for (i = 0; i < nr_attrs; i++) { - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - return -1; -} - -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - size_t i; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - return evlist__add_attrs(evlist, attrs, nr_attrs); -} - -__weak int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return __evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && - (int)evsel->core.attr.config == id) - return evsel; - } - - return NULL; -} - struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1199,11 +1144,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) return ret; } -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) -{ - return evlist__set_tp_filter_pids(evlist, 1, &pid); -} - int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); @@ -1484,6 +1424,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const int child_ready_pipe[2], go_pipe[2]; char bf; + evlist->workload.cork_fd = -1; + if (pipe(child_ready_pipe) < 0) { perror("failed to create 'ready' pipe"); return -1; @@ -1536,7 +1478,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() - * here. + * here (See evlist__cancel_workload()). */ if (ret != 1) { if (ret == -1) @@ -1600,7 +1542,7 @@ out_close_ready_pipe: int evlist__start_workload(struct evlist *evlist) { - if (evlist->workload.cork_fd > 0) { + if (evlist->workload.cork_fd >= 0) { char bf = 0; int ret; /* @@ -1611,12 +1553,24 @@ int evlist__start_workload(struct evlist *evlist) perror("unable to write to pipe"); close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; return ret; } return 0; } +void evlist__cancel_workload(struct evlist *evlist) +{ + int status; + + if (evlist->workload.cork_fd >= 0) { + close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; + waitpid(evlist->workload.pid, &status, WNOHANG); + } +} + int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = evlist__event2evsel(evlist, event); @@ -2619,7 +2573,8 @@ void evlist__uniquify_name(struct evlist *evlist) else attributes = empty_attributes; - if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "", + pos->name, attributes + 1)) { free(pos->name); pos->name = new_name; } else { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bcc1c6984bb5..adddb1db1ad2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -102,18 +102,6 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); - -int __evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs); - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs); - -#define evlist__add_default_attrs(evlist, array) \ - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); int evlist__add_dummy(struct evlist *evlist); @@ -144,7 +132,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) int evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); int evlist__append_tp_filter(struct evlist *evlist, const char *filter); @@ -152,7 +139,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); @@ -186,6 +172,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); int evlist__start_workload(struct evlist *evlist); +void evlist__cancel_workload(struct evlist *evlist); struct option; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5..d22c5df1701e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,12 +5,16 @@ * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <byteswap.h> #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> -#include <api/io.h> #include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <linux/hw_breakpoint.h> @@ -20,6 +24,7 @@ #include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <dirent.h> #include <stdlib.h> @@ -51,6 +56,8 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" @@ -64,46 +71,135 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct perf_missing_features perf_missing_features; static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { - NULL, - "duration_time", - "user_time", - "system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { - if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) - return perf_tool_event__tool_names[ev]; - - return NULL; + return 0; } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void) { - int i; + static bool test_attr__enabled; + static bool test_attr__enabled_tested; + + if (!test_attr__enabled_tested) { + char *dir = getenv("PERF_TEST_ATTR"); - perf_tool_event__for_each_event(i) { - if (!strcmp(str, perf_tool_event__tool_names[i])) - return i; + test_attr__enabled = (dir != NULL); + test_attr__enabled_tested = true; } - return PERF_TOOL_NONE; + return test_attr__enabled; } +#define __WRITE_ASS(str, fmt, data) \ +do { \ + if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \ + perror("test attr - failed to write event file"); \ + fclose(file); \ + return -1; \ + } \ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) { + FILE *file; + char path[PATH_MAX]; + char *dir = getenv("PERF_TEST_ATTR"); + + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, + attr->type, attr->config, fd); + + file = fopen(path, "w+"); + if (!file) { + perror("test attr - failed to open event file"); + return -1; + } + + if (fprintf(file, "[event-%d-%llu-%d]\n", + attr->type, attr->config, fd) < 0) { + perror("test attr - failed to write event file"); + fclose(file); + return -1; + } + + /* syscall arguments */ + __WRITE_ASS(fd, "d", fd); + __WRITE_ASS(group_fd, "d", group_fd); + __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(pid, "d", pid); + __WRITE_ASS(flags, "lu", flags); + + /* struct perf_event_attr */ + WRITE_ASS(type, PRIu32); + WRITE_ASS(size, PRIu32); + WRITE_ASS(config, "llu"); + WRITE_ASS(sample_period, "llu"); + WRITE_ASS(sample_type, "llu"); + WRITE_ASS(read_format, "llu"); + WRITE_ASS(disabled, "d"); + WRITE_ASS(inherit, "d"); + WRITE_ASS(pinned, "d"); + WRITE_ASS(exclusive, "d"); + WRITE_ASS(exclude_user, "d"); + WRITE_ASS(exclude_kernel, "d"); + WRITE_ASS(exclude_hv, "d"); + WRITE_ASS(exclude_idle, "d"); + WRITE_ASS(mmap, "d"); + WRITE_ASS(comm, "d"); + WRITE_ASS(freq, "d"); + WRITE_ASS(inherit_stat, "d"); + WRITE_ASS(enable_on_exec, "d"); + WRITE_ASS(task, "d"); + WRITE_ASS(watermark, "d"); + WRITE_ASS(precise_ip, "d"); + WRITE_ASS(mmap_data, "d"); + WRITE_ASS(sample_id_all, "d"); + WRITE_ASS(exclude_host, "d"); + WRITE_ASS(exclude_guest, "d"); + WRITE_ASS(exclude_callchain_kernel, "d"); + WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); + WRITE_ASS(wakeup_events, PRIu32); + WRITE_ASS(bp_type, PRIu32); + WRITE_ASS(config1, "llu"); + WRITE_ASS(config2, "llu"); + WRITE_ASS(branch_sample_type, "llu"); + WRITE_ASS(sample_regs_user, "llu"); + WRITE_ASS(sample_stack_user, PRIu32); + + fclose(file); return 0; } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) +{ + int errno_saved = errno; + + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } + + errno = errno_saved; +} static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { @@ -296,9 +392,9 @@ void evsel__init(struct evsel *evsel, evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; - evsel->pmu_name = NULL; evsel->group_pmu_name = NULL; evsel->skippable = false; + evsel->alternate_hw_config = PERF_COUNT_HW_MAX; } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -393,11 +489,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->group_name == NULL) goto out_err; } - if (orig->pmu_name) { - evsel->pmu_name = strdup(orig->pmu_name); - if (evsel->pmu_name == NULL) - goto out_err; - } if (orig->group_pmu_name) { evsel->group_pmu_name = strdup(orig->group_pmu_name); if (evsel->group_pmu_name == NULL) @@ -421,7 +512,6 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.leader = orig->core.leader; evsel->max_events = orig->max_events; - evsel->tool_event = orig->tool_event; free((char *)evsel->unit); evsel->unit = strdup(orig->unit); if (evsel->unit == NULL) @@ -445,6 +535,8 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; + evsel->alternate_hw_config = orig->alternate_hw_config; + return evsel; out_err: @@ -548,7 +640,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; - bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ @@ -560,17 +651,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) MOD_PRINT(kernel, 'k'); MOD_PRINT(user, 'u'); MOD_PRINT(hv, 'h'); - exclude_guest_default = true; } if (attr->precise_ip) { if (!colon) colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); - exclude_guest_default = true; } - if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + if (attr->exclude_host || attr->exclude_guest) { MOD_PRINT(host, 'H'); MOD_PRINT(guest, 'G'); } @@ -617,11 +706,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ - return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} - static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -772,10 +856,7 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel__is_tool(evsel)) - evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf)); - else - evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -786,6 +867,10 @@ const char *evsel__name(struct evsel *evsel) evsel__bp_name(evsel, bf, sizeof(bf)); break; + case PERF_PMU_TYPE_TOOL: + scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); + break; + default: scnprintf(bf, sizeof(bf), "unknown attr type: %d", evsel->core.attr.type); @@ -811,7 +896,7 @@ const char *evsel__metric_id(const struct evsel *evsel) return evsel->metric_id; if (evsel__is_tool(evsel)) - return perf_tool_event__to_str(evsel__tool_event(evsel)); + return evsel__tool_pmu_event_name(evsel); return "unknown"; } @@ -862,7 +947,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o { bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - const char *arch = perf_env__arch(evsel__env(evsel)); evsel__set_sample_bit(evsel, CALLCHAIN); @@ -893,6 +977,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { + const char *arch = perf_env__arch(evsel__env(evsel)); + evsel__set_sample_bit(evsel, REGS_USER); evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && @@ -1150,7 +1236,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; - attr->inherit = !opts->no_inherit; + attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; attr->read_format = PERF_FORMAT_LOST; @@ -1172,7 +1258,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |= PERF_FORMAT_GROUP; - attr->inherit = 0; + } + + /* + * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format + */ + if (attr->inherit) { + evsel__set_sample_bit(evsel, TID); + evsel->core.attr.read_format |= + PERF_FORMAT_ID; } } @@ -1494,7 +1588,6 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->filter); - zfree(&evsel->pmu_name); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); @@ -1503,8 +1596,8 @@ void evsel__exit(struct evsel *evsel) evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); - if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) xyarray__delete(evsel->start_times); } @@ -1684,171 +1777,31 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -static bool read_until_char(struct io *io, char e) -{ - int c; - - do { - c = io__get_char(io); - if (c == -1) - return false; - } while (c != e); - return true; -} - -static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) -{ - char buf[256]; - struct io io; - int i; - - io__init(&io, fd, buf, sizeof(buf)); - - /* Skip lines to relevant CPU. */ - for (i = -1; i < cpu.cpu; i++) { - if (!read_until_char(&io, '\n')) - return -EINVAL; - } - /* Skip to "cpu". */ - if (io__get_char(&io) != 'c') return -EINVAL; - if (io__get_char(&io) != 'p') return -EINVAL; - if (io__get_char(&io) != 'u') return -EINVAL; - - /* Skip N of cpuN. */ - if (!read_until_char(&io, ' ')) - return -EINVAL; - - i = 1; - while (true) { - if (io__get_dec(&io, val) != ' ') - break; - if (field == i) - return 0; - i++; - } - return -EINVAL; -} - -static int read_pid_stat_field(int fd, int field, __u64 *val) +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) { - char buf[256]; - struct io io; - int c, i; - io__init(&io, fd, buf, sizeof(buf)); - if (io__get_dec(&io, val) != ' ') - return -EINVAL; - if (field == 1) - return 0; - - /* Skip comm. */ - if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) - return -EINVAL; - if (field == 2) - return -EINVAL; /* String can't be returned. */ - - /* Skip state */ - if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) - return -EINVAL; - if (field == 3) - return -EINVAL; /* String can't be returned. */ - - /* Loop over numeric fields*/ - if (io__get_char(&io) != ' ') - return -EINVAL; + u32 e_type = evsel->core.attr.type; + u64 e_config = evsel->core.attr.config; - i = 4; - while (true) { - c = io__get_dec(&io, val); - if (c == -1) - return -EINVAL; - if (c == -2) { - /* Assume a -ve was read */ - c = io__get_dec(&io, val); - *val *= -1; - } - if (c != ' ') - return -EINVAL; - if (field == i) - return 0; - i++; + if (e_type != type) { + return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config; } - return -EINVAL; -} - -static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread) -{ - __u64 *start_time, cur_time, delta_start; - int fd, err = 0; - struct perf_counts_values *count; - bool adjust = false; - count = perf_counts(evsel->counts, cpu_map_idx, thread); + if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + e_config &= PERF_HW_EVENT_MASK; - switch (evsel__tool_event(evsel)) { - case PERF_TOOL_DURATION_TIME: - /* - * Pretend duration_time is only on the first CPU and thread, or - * else aggregation will scale duration_time by the number of - * CPUs/threads. - */ - start_time = &evsel->start_time; - if (cpu_map_idx == 0 && thread == 0) - cur_time = rdclock(); - else - cur_time = *start_time; - break; - case PERF_TOOL_USER_TIME: - case PERF_TOOL_SYSTEM_TIME: { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - - start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); - fd = FD(evsel, cpu_map_idx, thread); - lseek(fd, SEEK_SET, 0); - if (evsel->pid_stat) { - /* The event exists solely on 1 CPU. */ - if (cpu_map_idx == 0) - err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); - else - cur_time = 0; - } else { - /* The event is for all threads. */ - if (thread == 0) { - struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, - cpu_map_idx); - - err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); - } else { - cur_time = 0; - } - } - adjust = true; - break; - } - case PERF_TOOL_NONE: - case PERF_TOOL_MAX: - default: - err = -EINVAL; - } - if (err) - return err; - - delta_start = cur_time - *start_time; - if (adjust) { - __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); - - delta_start *= 1000000000 / ticks_per_sec; - } - count->val = delta_start; - count->ena = count->run = delta_start; - count->lost = 0; - return 0; + return e_config == config; } int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { if (evsel__is_tool(evsel)) - return evsel__read_tool(evsel, cpu_map_idx, thread); + return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_hwmon(evsel)) + return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); if (evsel__is_retire_lat(evsel)) return evsel__read_retire_lat(evsel, cpu_map_idx, thread); @@ -2042,6 +1995,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { + int ret = 0; int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -2072,23 +2026,21 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; - if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) && - !evsel->start_times) { - evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); - if (!evsel->start_times) - return -ENOMEM; - } + if (evsel__is_tool(evsel)) + ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads); evsel->open_flags = PERF_FLAG_FD_CLOEXEC; if (evsel->cgrp) evsel->open_flags |= PERF_FLAG_PID_CGROUP; - return 0; + return ret; } static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) + evsel->core.attr.inherit = 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -2138,120 +2090,346 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) +{ + int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + + if (fd < 0) { + attr->exclude_kernel = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_hv = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_guest = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + attr->exclude_kernel = 0; + attr->exclude_guest = 0; + attr->exclude_hv = 0; + + return fd >= 0; +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel) { + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + }; + struct perf_pmu *pmu = evsel->pmu; + int old_errno; + + old_errno = errno; + + if (pmu == NULL) + pmu = evsel->pmu = evsel__find_pmu(evsel); + + if (pmu == NULL || pmu->missing_features.checked) + goto out; + /* * Must probe features in the order they were added to the - * perf_event_attr interface. + * perf_event_attr interface. These are kernel core limitation but + * specific to PMUs with branch stack. So we can detect with the given + * hardware event and stop on the first one succeeded. */ - if (!perf_missing_features.branch_counters && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { - perf_missing_features.branch_counters = true; - pr_debug2("switching off branch counters support\n"); + + /* Please add new feature detection here. */ + + attr.exclude_guest = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + pmu->missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: + pmu->missing_features.checked = true; +out: + errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + .sample_type = PERF_SAMPLE_BRANCH_STACK, + .sample_period = 1000, + }; + int old_errno; + + if (detection_done) + return; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are PMU specific limitation + * so we can detect with the given hardware event and stop on the + * first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.lbr_flags = true; + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: + detection_done = true; + errno = old_errno; +} + +static bool evsel__detect_missing_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .disabled = 1, + }; + int old_errno; + + evsel__detect_missing_pmu_features(evsel); + + if (evsel__has_br_stack(evsel)) + evsel__detect_missing_brstack_features(evsel); + + if (detection_done) + goto check; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are kernel core limitation + * not PMU-specific so we can detect with a software event and + * stop on the first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.inherit = true; + attr.sample_type = PERF_SAMPLE_READ; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.inherit_sample_read = true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); + attr.inherit = false; + attr.sample_type = 0; + + attr.read_format = PERF_FORMAT_LOST; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + attr.read_format = 0; + + attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.cgroup = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support\n"); + attr.cgroup = 0; + + attr.aux_output = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.aux_output = true; + pr_debug2_peo("Kernel has no attr.aux_output support\n"); + attr.aux_output = 0; + + attr.bpf_event = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.bpf = true; + pr_debug2_peo("switching off bpf_event\n"); + attr.bpf_event = 0; + + attr.ksymbol = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.ksymbol = true; + pr_debug2_peo("switching off ksymbol\n"); + attr.ksymbol = 0; + + attr.write_backward = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.write_backward = true; + pr_debug2_peo("switching off write_backward\n"); + attr.write_backward = 0; + + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.clockid = true; + pr_debug2_peo("switching off clockid\n"); + attr.use_clockid = 0; + attr.clockid = 0; + + if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) + goto found; + perf_missing_features.cloexec = true; + pr_debug2_peo("switching off cloexec flag\n"); + + attr.mmap2 = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.mmap2 = true; + pr_debug2_peo("switching off mmap2\n"); + attr.mmap2 = 0; + + /* set this unconditionally? */ + perf_missing_features.sample_id_all = true; + pr_debug2_peo("switching off sample_id_all\n"); + + attr.inherit = 1; + attr.read_format = PERF_FORMAT_GROUP; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.group_read = true; + pr_debug2_peo("switching off group read\n"); + attr.inherit = 0; + attr.read_format = 0; + +found: + detection_done = true; + errno = old_errno; + +check: + if (evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && + perf_missing_features.inherit_sample_read) return true; - } else if (!perf_missing_features.read_lost && - (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { - perf_missing_features.read_lost = true; - pr_debug2("switching off PERF_FORMAT_LOST support\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + perf_missing_features.branch_counters) return true; - } else if (!perf_missing_features.weight_struct && - (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { - perf_missing_features.weight_struct = true; - pr_debug2("switching off weight struct support\n"); + + if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && + perf_missing_features.read_lost) return true; - } else if (!perf_missing_features.code_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { - perf_missing_features.code_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.data_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { - perf_missing_features.data_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { - perf_missing_features.cgroup = true; - pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - return false; - } else if (!perf_missing_features.branch_hw_idx && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { - perf_missing_features.branch_hw_idx = true; - pr_debug2("switching off branch HW index support\n"); + + if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && + perf_missing_features.weight_struct) return true; - } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { - perf_missing_features.aux_output = true; - pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - return false; - } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { - perf_missing_features.bpf = true; - pr_debug2_peo("switching off bpf_event\n"); + + if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && + !perf_missing_features.clockid) { + perf_missing_features.clockid_wrong = true; return true; - } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { - perf_missing_features.ksymbol = true; - pr_debug2_peo("switching off ksymbol\n"); + } + + if (evsel->core.attr.use_clockid && perf_missing_features.clockid) return true; - } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { - perf_missing_features.write_backward = true; - pr_debug2_peo("switching off write_backward\n"); - return false; - } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { - perf_missing_features.clockid_wrong = true; - pr_debug2_peo("switching off clockid\n"); + + if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && + perf_missing_features.cloexec) return true; - } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { - perf_missing_features.clockid = true; - pr_debug2_peo("switching off use_clockid\n"); + + if (evsel->core.attr.mmap2 && perf_missing_features.mmap2) return true; - } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { - perf_missing_features.cloexec = true; - pr_debug2_peo("switching off cloexec flag\n"); + + if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES)) && + perf_missing_features.lbr_flags) return true; - } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { - perf_missing_features.mmap2 = true; - pr_debug2_peo("switching off mmap2\n"); + + if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && + perf_missing_features.group_read) return true; - } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { - if (evsel->pmu == NULL) - evsel->pmu = evsel__find_pmu(evsel); - - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) return true; - } else if (!perf_missing_features.sample_id_all) { - perf_missing_features.sample_id_all = true; - pr_debug2_peo("switching off sample_id_all\n"); + + if (evsel->core.attr.bpf_event && perf_missing_features.bpf) return true; - } else if (!perf_missing_features.lbr_flags && - (evsel->core.attr.branch_sample_type & - (PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS))) { - perf_missing_features.lbr_flags = true; - pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && + perf_missing_features.branch_hw_idx) return true; - } else if (!perf_missing_features.group_read && - evsel->core.attr.inherit && - (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - evsel__is_group_leader(evsel)) { - perf_missing_features.group_read = true; - pr_debug2_peo("switching off group read\n"); + + if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all) + return true; + + return false; +} + +static bool evsel__handle_error_quirks(struct evsel *evsel, int error) +{ + /* + * AMD core PMU tries to forward events with precise_ip to IBS PMU + * implicitly. But IBS PMU has more restrictions so it can fail with + * supported event attributes. Let's forward it back to the core PMU + * by clearing precise_ip only if it's from precise_max (:P). + */ + if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && + evsel->core.attr.precise_ip && evsel->precise_max) { + evsel->core.attr.precise_ip = 0; + pr_debug2_peo("removing precise_ip on AMD\n"); + display_attr(&evsel->core.attr); return true; - } else { - return false; } + + return false; } static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -2262,13 +2440,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; - if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) { - if (evsel->core.attr.sample_period) /* no sampling */ - return -EINVAL; - evsel->start_time = rdclock(); - return 0; - } - if (evsel__is_retire_lat(evsel)) return tpebs_start(evsel->evlist); @@ -2293,6 +2464,17 @@ fallback_missing_features: pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); + if (evsel__is_tool(evsel)) { + return evsel__tool_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + if (evsel__is_hwmon(evsel)) { + return evsel__hwmon_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { @@ -2304,46 +2486,6 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME || - evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - __u64 *start_time = NULL; - - if (evsel->core.attr.sample_period) { - /* no sampling */ - err = -EINVAL; - goto out_close; - } - if (pid > -1) { - char buf[64]; - - snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); - fd = open(buf, O_RDONLY); - evsel->pid_stat = true; - } else { - fd = open("/proc/stat", O_RDONLY); - } - FD(evsel, idx, thread) = fd; - if (fd < 0) { - err = -errno; - goto out_close; - } - start_time = xyarray__entry(evsel->start_times, idx, thread); - if (pid > -1) { - err = read_pid_stat_field(fd, system ? 15 : 14, - start_time); - } else { - struct perf_cpu cpu; - - cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); - err = read_stat_field(fd, cpu, system ? 3 : 1, - start_time); - } - if (err) - goto out_close; - continue; - } - group_fd = get_group_fd(evsel, idx, thread); if (group_fd == -2) { @@ -2352,8 +2494,6 @@ retry_open: goto out_close; } - test_attr__ready(); - /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); @@ -2374,7 +2514,7 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); - if (unlikely(test_attr__enabled)) { + if (unlikely(test_attr__enabled())) { test_attr__open(&evsel->core.attr, pid, perf_cpu_map__cpu(cpus, idx), fd, group_fd, evsel->open_flags); @@ -2415,9 +2555,6 @@ retry_open: return 0; try_fallback: - if (evsel__precise_ip_fallback(evsel)) - goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2434,11 +2571,15 @@ try_fallback: if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) - goto out_close; - - if (evsel__detect_missing_features(evsel)) + if (err == -EINVAL && evsel__detect_missing_features(evsel)) goto fallback_missing_features; + + if (evsel__precise_ip_fallback(evsel)) + goto retry_open; + + if (evsel__handle_error_quirks(evsel, err)) + goto retry_open; + out_close: if (err) threads->err_thread = thread; @@ -3245,6 +3386,27 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, evsel->core.attr.exclude_hv = 1; return true; + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + return false; + + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + + return true; } return false; @@ -3415,7 +3577,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg | grep -i perf may provide additional information.\n", + "\"dmesg | grep -i perf\" may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 15e745a9a798..04934a7af174 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,6 +11,7 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include "pmus.h" +#include "pmu.h" struct bpf_object; struct cgroup; @@ -22,25 +23,9 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); -enum perf_tool_event { - PERF_TOOL_NONE = 0, - PERF_TOOL_DURATION_TIME = 1, - PERF_TOOL_USER_TIME = 2, - PERF_TOOL_SYSTEM_TIME = 3, - - PERF_TOOL_MAX, -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev); -enum perf_tool_event perf_tool_event__from_str(const char *str); - -#define perf_tool_event__for_each_event(ev) \ - for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -72,7 +57,6 @@ struct evsel { struct { char *name; char *group_name; - const char *pmu_name; const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; @@ -83,7 +67,6 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; - enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -102,6 +85,7 @@ struct evsel { int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; + u64 alternate_hw_config; }; /* @@ -183,7 +167,7 @@ struct evsel { unsigned long open_flags; int precise_ip_original; - /* for missing_features */ + /* The PMU the event is from. Used for missing_features, PMU name, etc. */ struct perf_pmu *pmu; /* For tool events */ @@ -221,6 +205,7 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool inherit_sample_read; }; extern struct perf_missing_features perf_missing_features; @@ -320,21 +305,11 @@ const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); const char *evsel__metric_id(const struct evsel *evsel); -static inline bool evsel__is_tool(const struct evsel *evsel) -{ - return evsel->tool_event != PERF_TOOL_NONE; -} - static inline bool evsel__is_retire_lat(const struct evsel *evsel) { return evsel->retire_lat; } -static inline enum perf_tool_event evsel__tool_event(const struct evsel *evsel) -{ - return evsel->tool_event; -} - const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -368,7 +343,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel); int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -bool evsel__detect_missing_features(struct evsel *evsel); bool evsel__precise_ip_fallback(struct evsel *evsel); @@ -393,26 +367,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name); -static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) -{ - if (evsel->core.attr.type != type) - return false; - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) - return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; - - return evsel->core.attr.config == config; -} +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config); #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) -static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) -{ - return (e1->core.attr.type == e2->core.attr.type) && - (e1->core.attr.config == e2->core.attr.config); -} - int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index c2c0500d5da9..86b7f46f9e2a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -14,7 +14,7 @@ #include "dso.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b2536a59c44e..f289044a1f7c 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,25 +5,22 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "evlist.h" #include "expr.h" +#include "smt.h" +#include "tool_pmu.h" #include <util/expr-bison.h> #include <util/expr-flex.h> #include "util/hashmap.h" #include "util/header.h" #include "util/pmu.h" -#include "smt.h" -#include "tsc.h" -#include <api/fs/fs.h> +#include <perf/cpumap.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> #include <math.h> -#include "pmu.h" struct expr_id_data { union { @@ -393,90 +390,26 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } -#if !defined(__i386__) && !defined(__x86_64__) -double arch_get_tsc_freq(void) -{ - return 0.0; -} -#endif - -static double has_pmem(void) -{ - static bool has_pmem, cached; - const char *sysfs = sysfs__mountpoint(); - char path[PATH_MAX]; - - if (!cached) { - snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); - has_pmem = access(path, F_OK) == 0; - cached = true; - } - return has_pmem ? 1.0 : 0.0; -} - double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { - const struct cpu_topology *topology; double result = NAN; + enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1); - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - if (!strcmp("#num_cpus_online", literal)) { - struct perf_cpu_map *online = cpu_map__online(); - - if (online) - result = perf_cpu_map__nr(online); - goto out; - } + if (ev != TOOL_PMU__EVENT_NONE) { + u64 count; - if (!strcasecmp("#system_tsc_freq", literal)) { - result = arch_get_tsc_freq(); - goto out; - } + if (tool_pmu__read_event(ev, &count)) + result = count; + else + pr_err("Failure to read '%s'", literal); - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#core_wide", literal)) { + } else if (!strcmp("#core_wide", literal)) { result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_packages", literal)) { - topology = online_topology(); - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - topology = online_topology(); - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - topology = online_topology(); - result = topology->core_cpus_lists; - goto out; - } - if (!strcmp("#slots", literal)) { - result = perf_pmu__cpu_slots_per_cycle(); - goto out; - } - if (!strcmp("#has_pmem", literal)) { - result = has_pmem(); - goto out; + } else { + pr_err("Unrecognized literal '%s'", literal); } - pr_err("Unrecognized literal '%s'", literal); -out: pr_debug2("literal: %s = %f\n", literal, result); return result; } @@ -523,8 +456,8 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, const char *test_id) { double ret; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); if (!cpuid) return NAN; diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c8f6bee1fa61..cdce7f173d00 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -16,7 +16,7 @@ #include <inttypes.h> #include <fcntl.h> #include <err.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include <dwarf.h> #endif @@ -499,7 +499,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 4e2e4f40e134..9f0b875d6548 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -8,7 +8,7 @@ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #endif diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a6386d12afd7..3451e542b69a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -58,7 +58,7 @@ #include <internal/lib.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif /* @@ -819,11 +819,31 @@ static int write_group_desc(struct feat_fd *ff, * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return NULL; } +char *get_cpuid_allow_env_override(struct perf_cpu cpu) +{ + char *cpuid; + static bool printed; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(cpu); + if (!cpuid) + return NULL; + + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } + return cpuid; +} + /* Return zero when the cpuid from the mapfile.csv matches the * cpuid string generated on this platform. * Otherwise return non-zero. @@ -856,18 +876,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ -int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) +int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused, + struct perf_cpu cpu __maybe_unused) { return ENOSYS; /* Not implemented */ } -static int write_cpuid(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_cpuid(struct feat_fd *ff, struct evlist *evlist) { + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); char buffer[64]; int ret; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) return -1; @@ -987,57 +1008,6 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } -/* - * Check whether a CPU is online - * - * Returns: - * 1 -> if CPU is online - * 0 -> if CPU is offline - * -1 -> error case - */ -int is_cpu_online(unsigned int cpu) -{ - char *str; - size_t strlen; - char buf[256]; - int status = -1; - struct stat statbuf; - - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d", cpu); - if (stat(buf, &statbuf) != 0) - return 0; - - /* - * Check if /sys/devices/system/cpu/cpux/online file - * exists. Some cases cpu0 won't have online file since - * it is not expected to be turned off generally. - * In kernels without CONFIG_HOTPLUG_CPU, this - * file won't exist - */ - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d/online", cpu); - if (stat(buf, &statbuf) != 0) - return 1; - - /* - * Read online file using sysfs__read_str. - * If read or open fails, return -1. - * If read succeeds, return value from file - * which gets stored in "str" - */ - snprintf(buf, sizeof(buf), - "devices/system/cpu/cpu%d/online", cpu); - - if (sysfs__read_str(buf, &str, &strlen) < 0) - return status; - - status = atoi(str); - - free(str); - return status; -} - #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a63a361f20f4..5201af6305f4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,7 +10,13 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "env.h" -#include "pmu.h" +#include <perf/cpumap.h> + +struct evlist; +union perf_event; +struct perf_header; +struct perf_session; +struct perf_tool; enum { HEADER_RESERVED = 0, /* always cleared */ @@ -91,8 +97,6 @@ struct perf_pipe_file_header { u64 size; }; -struct perf_header; - int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); @@ -124,11 +128,6 @@ struct perf_header_feature_ops { bool synthesize; }; -struct evlist; -struct perf_session; -struct perf_tool; -union perf_event; - extern const char perf_version_string[]; int perf_session__read_header(struct perf_session *session); @@ -196,14 +195,16 @@ int write_padded(struct feat_fd *fd, const void *bf, #define MAX_CACHE_LVL 4 -int is_cpu_online(unsigned int cpu); int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); /* * arch specific callback */ -int get_cpuid(char *buffer, size_t sz); +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu); + +char *get_cpuid_str(struct perf_cpu cpu); + +char *get_cpuid_allow_env_override(struct perf_cpu cpu); -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f387e85a0087..fff134565801 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -218,6 +218,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7d7ae94b4b31..1131056924d9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -87,6 +87,9 @@ enum hist_column { HISTC_TYPE_OFFSET, HISTC_SYMBOL_OFFSET, HISTC_TYPE_CACHELINE, + HISTC_CALLCHAIN_BRANCH_PREDICTED, + HISTC_CALLCHAIN_BRANCH_ABORT, + HISTC_CALLCHAIN_BRANCH_CYCLES, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c new file mode 100644 index 000000000000..4acb9bb19b84 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.c @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "hashmap.h" +#include "hwmon_pmu.h" +#include "pmu.h" +#include <internal/xyarray.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <sys/types.h> +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +/** Strings that correspond to enum hwmon_type. */ +static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = { + NULL, + "cpu", + "curr", + "energy", + "fan", + "humidity", + "in", + "intrusion", + "power", + "pwm", + "temp", +}; +#define LONGEST_HWMON_TYPE_STR "intrusion" + +/** Strings that correspond to enum hwmon_item. */ +static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = { + NULL, + "accuracy", + "alarm", + "auto_channels_temp", + "average", + "average_highest", + "average_interval", + "average_interval_max", + "average_interval_min", + "average_lowest", + "average_max", + "average_min", + "beep", + "cap", + "cap_hyst", + "cap_max", + "cap_min", + "crit", + "crit_hyst", + "div", + "emergency", + "emergency_hist", + "enable", + "fault", + "freq", + "highest", + "input", + "label", + "lcrit", + "lcrit_hyst", + "lowest", + "max", + "max_hyst", + "min", + "min_hyst", + "mod", + "offset", + "pulses", + "rated_max", + "rated_min", + "reset_history", + "target", + "type", + "vid", +}; +#define LONGEST_HWMON_ITEM_STR "average_interval_max" + +static const char *const hwmon_units[HWMON_TYPE_MAX] = { + NULL, + "V", /* cpu */ + "A", /* curr */ + "J", /* energy */ + "rpm", /* fan */ + "%", /* humidity */ + "V", /* in */ + "", /* intrusion */ + "W", /* power */ + "Hz", /* pwm */ + "'C", /* temp */ +}; + +struct hwmon_pmu { + struct perf_pmu pmu; + struct hashmap events; + int hwmon_dir_fd; +}; + +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + +/** + * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. + * + * Hwmon files are of the form <type><number>_<item> and may have a suffix + * _alarm. + */ +struct hwmon_pmu_event_value { + /** @items: which item files are present. */ + DECLARE_BITMAP(items, HWMON_ITEM__MAX); + /** @alarm_items: which item files are present. */ + DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX); + /** @label: contents of <type><number>_label if present. */ + char *label; + /** @name: name computed from label of the form <type>_<label>. */ + char *name; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START && + pmu->type <= PERF_PMU_TYPE_HWMON_END; +} + +bool evsel__is_hwmon(const struct evsel *evsel) +{ + return perf_pmu__is_hwmon(evsel->pmu); +} + +static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key).type_and_num; +} + +static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key1).type_and_num == + ((union hwmon_pmu_event_key)key2).type_and_num; +} + +static int hwmon_strcmp(const void *a, const void *b) +{ + const char *sa = a; + const char * const *sb = b; + + return strcmp(sa, *sb); +} + +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm) +{ + char fn_type[24]; + const char **elem; + const char *fn_item = NULL; + size_t fn_item_len; + + assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type)); + strlcpy(fn_type, filename, sizeof(fn_type)); + for (size_t i = 0; fn_type[i] != '\0'; i++) { + if (fn_type[i] >= '0' && fn_type[i] <= '9') { + fn_type[i] = '\0'; + *number = strtoul(&filename[i], (char **)&fn_item, 10); + if (*fn_item == '_') + fn_item++; + break; + } + if (fn_type[i] == '_') { + fn_type[i] = '\0'; + *number = -1; + fn_item = &filename[i + 1]; + break; + } + } + if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) { + pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename); + return false; + } + elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1, + sizeof(hwmon_type_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n", + fn_type, filename); + return false; + } + + *type = elem - &hwmon_type_strs[0]; + if (!item) + return true; + + *alarm = false; + fn_item_len = strlen(fn_item); + if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) { + assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type)); + strlcpy(fn_type, fn_item, fn_item_len - 5); + fn_item = fn_type; + *alarm = true; + } + elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1, + sizeof(hwmon_item_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n", + fn_item, filename); + return false; + } + *item = elem - &hwmon_item_strs[0]; + return true; +} + +static void fix_name(char *p) +{ + char *s = strchr(p, '\n'); + + if (s) + *s = '\0'; + + while (*p != '\0') { + if (strchr(" :,/\n\t", *p)) + *p = '_'; + else + *p = tolower(*p); + p++; + } +} + +static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) +{ + DIR *dir; + struct dirent *ent; + int dup_fd, err = 0; + struct hashmap_entry *cur, *tmp; + size_t bkt; + + if (pmu->pmu.sysfs_aliases_loaded) + return 0; + + /* + * Use a dup-ed fd as closedir will close it. Use openat so that the + * directory contents are refreshed. + */ + dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY); + + if (dup_fd == -1) + return -ENOMEM; + + dir = fdopendir(dup_fd); + if (!dir) { + close(dup_fd); + return -ENOMEM; + } + + while ((ent = readdir(dir)) != NULL) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hwmon_pmu_event_value *value; + + if (ent->d_type != DT_REG) + continue; + + if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) { + pr_debug3("Not a hwmon file '%s'\n", ent->d_name); + continue; + } + key.num = number; + key.type = type; + if (!hashmap__find(&pmu->events, key.type_and_num, &value)) { + value = zalloc(sizeof(*value)); + if (!value) { + err = -ENOMEM; + goto err_out; + } + err = hashmap__add(&pmu->events, key.type_and_num, value); + if (err) { + free(value); + err = -ENOMEM; + goto err_out; + } + } + __set_bit(item, alarm ? value->alarm_items : value->items); + if (item == HWMON_ITEM_LABEL) { + char buf[128]; + int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY); + ssize_t read_len; + + if (fd < 0) + continue; + + read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) + buf[read_len] = '\0'; + + if (buf[0] == '\0') { + pr_debug("hwmon_pmu: empty label file %s %s\n", + pmu->pmu.name, ent->d_name); + close(fd); + continue; + } + value->label = strdup(buf); + if (!value->label) { + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + continue; + } + snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label); + fix_name(buf); + value->name = strdup(buf); + if (!value->name) + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + } + } + if (hashmap__size(&pmu->events) == 0) + pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name); + + hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (!test_bit(HWMON_ITEM_INPUT, value->items)) { + pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n", + pmu->pmu.name, hwmon_type_strs[key.type], key.num); + hashmap__delete(&pmu->events, key.type_and_num, &key, &value); + zfree(&value->label); + zfree(&value->name); + free(value); + } + } + pmu->pmu.sysfs_aliases_loaded = true; + +err_out: + closedir(dir); + return err; +} + +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name) +{ + char buf[32]; + struct hwmon_pmu *hwm; + + hwm = zalloc(sizeof(*hwm)); + if (!hwm) + return NULL; + + hwm->hwmon_dir_fd = hwmon_dir; + hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + goto err_out; + } + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); + hwm->pmu.name = strdup(buf); + if (!hwm->pmu.name) + goto err_out; + hwm->pmu.alias_name = strdup(sysfs_name); + if (!hwm->pmu.alias_name) + goto err_out; + hwm->pmu.cpus = perf_cpu_map__new("0"); + if (!hwm->pmu.cpus) + goto err_out; + INIT_LIST_HEAD(&hwm->pmu.format); + INIT_LIST_HEAD(&hwm->pmu.aliases); + INIT_LIST_HEAD(&hwm->pmu.caps); + hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, + hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); + + list_add_tail(&hwm->pmu.list, pmus); + return &hwm->pmu; +err_out: + free((char *)hwm->pmu.name); + free(hwm->pmu.alias_name); + free(hwm); + close(hwmon_dir); + return NULL; +} + +void hwmon_pmu__exit(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + zfree(&value->label); + zfree(&value->name); + free(value); + } + hashmap__clear(&hwm->events); + close(hwm->hwmon_dir_fd); +} + +static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, + union hwmon_pmu_event_key key, + const unsigned long *items, bool is_alarm) +{ + size_t bit; + char buf[64]; + size_t len = 0; + + for_each_set_bit(bit, items, HWMON_ITEM__MAX) { + int fd; + + if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT) + continue; + + snprintf(buf, sizeof(buf), "%s%d_%s%s", + hwmon_type_strs[key.type], + key.num, + hwmon_item_strs[bit], + is_alarm ? "_alarm" : ""); + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + if (fd > 0) { + ssize_t read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) { + long long val; + + buf[read_len] = '\0'; + val = strtoll(buf, /*endptr=*/NULL, 10); + len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s", + len == 0 ? " " : ", ", + hwmon_item_strs[bit], + is_alarm ? "_alarm" : "", + (double)val / 1000.0, + hwmon_units[key.type]); + } + close(fd); + } + } + return len; +} + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur; + size_t bkt; + + if (hwmon_pmu__read_events(hwm)) + return false; + + hashmap__for_each_entry((&hwm->events), cur, bkt) { + static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = { + NULL, + "0.001V", /* cpu */ + "0.001A", /* curr */ + "0.001J", /* energy */ + "1rpm", /* fan */ + "0.001%", /* humidity */ + "0.001V", /* in */ + NULL, /* intrusion */ + "0.001W", /* power */ + "1Hz", /* pwm */ + "0.001'C", /* temp */ + }; + static const char *const hwmon_desc[HWMON_TYPE_MAX] = { + NULL, + "CPU core reference voltage", /* cpu */ + "Current", /* curr */ + "Cumulative energy use", /* energy */ + "Fan", /* fan */ + "Humidity", /* humidity */ + "Voltage", /* in */ + "Chassis intrusion detection", /* intrusion */ + "Power use", /* power */ + "Pulse width modulation fan control", /* pwm */ + "Temperature", /* temp */ + }; + char alias_buf[64]; + char desc_buf[256]; + char encoding_buf[128]; + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + struct pmu_event_info info = { + .pmu = pmu, + .name = value->name, + .alias = alias_buf, + .scale_unit = hwmon_scale_units[key.type], + .desc = desc_buf, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "hwmon", + .pmu_name = pmu->name, + .event_type_desc = "Hwmon event", + }; + int ret; + size_t len; + + len = snprintf(alias_buf, sizeof(alias_buf), "%s%d", + hwmon_type_strs[key.type], key.num); + if (!info.name) { + info.name = info.alias; + info.alias = NULL; + } + + len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.", + hwmon_desc[key.type], + pmu->name + 6, + value->label ?: info.name); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->items, /*is_alarm=*/false); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->alarm_items, /*is_alarm=*/true); + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/", + pmu->name, cur->key); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t hwmon_pmu__num_events(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + + hwmon_pmu__read_events(hwm); + return hashmap__size(&hwm->events); +} + +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + enum hwmon_type type; + int number; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hashmap_entry *cur; + size_t bkt; + + if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL)) + return false; + + if (hwmon_pmu__read_events(hwm)) + return false; + + key.type = type; + key.num = number; + if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL)) + return true; + if (key.num != -1) + return false; + /* Item is of form <type>_ which means we should match <type>_<label>. */ + hashmap__for_each_entry((&hwm->events), cur, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + key.type_and_num = cur->key; + if (key.type == type && value->name && !strcasecmp(name, value->name)) + return true; + } + return false; +} + +static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + if (number == -1) { + /* + * Item is of form <type>_ which means we should + * match <type>_<label>. + */ + struct hashmap_entry *cur; + size_t bkt; + + attr->config = 0; + hashmap__for_each_entry((&hwm->events), cur, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (key.type == type && value->name && + !strcasecmp(term->config, value->name)) { + attr->config = key.type_and_num; + break; + } + } + if (attr->config == 0) + return -EINVAL; + } else { + union hwmon_pmu_event_key key = { + .type_and_num = 0, + }; + + key.type = type; + key.num = number; + attr->config = key.type_and_num; + } + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct parse_events_term *term; + int ret; + + ret = hwmon_pmu__read_events(hwm); + if (ret) + return ret; + + list_for_each_entry(term, &terms->terms, list) { + if (hwmon_pmu__config_term(hwm, attr, term, err)) + return -EINVAL; + } + + return 0; + +} + +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err) +{ + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + info->unit = hwmon_units[type]; + if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM || + type == HWMON_TYPE_INTRUSION) + info->scale = 1; + else + info->scale = 0.001; + } + return 0; + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus) +{ + char *line = NULL; + DIR *class_hwmon_dir; + struct dirent *class_hwmon_ent; + char buf[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); + class_hwmon_dir = opendir(buf); + if (!class_hwmon_dir) + return 0; + + while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) { + size_t line_len; + int hwmon_dir, name_fd; + struct io io; + + if (class_hwmon_ent->d_type != DT_LNK) + continue; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name); + hwmon_dir = open(buf, O_DIRECTORY); + if (hwmon_dir == -1) { + pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n", + sysfs, class_hwmon_ent->d_name); + continue; + } + name_fd = openat(hwmon_dir, "name", O_RDONLY); + if (name_fd == -1) { + pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n", + sysfs, class_hwmon_ent->d_name); + close(hwmon_dir); + continue; + } + io__init(&io, name_fd, buf, sizeof(buf)); + io__getline(&io, &line, &line_len); + if (line_len > 0 && line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line); + close(name_fd); + } + free(line); + closedir(class_hwmon_dir); + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu); + union hwmon_pmu_event_key key = { + .type_and_num = evsel->core.attr.config, + }; + int idx = 0, thread = 0, nthreads, err = 0; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + char buf[64]; + int fd; + + snprintf(buf, sizeof(buf), "%s%d_input", + hwmon_type_strs[key.type], key.num); + + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + return err; +} + +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + char buf[32]; + int fd; + ssize_t len; + struct perf_counts_values *count, *old_count = NULL; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + len = pread(fd, buf, sizeof(buf), 0); + if (len <= 0) { + count->lost++; + return -EINVAL; + } + buf[len] = '\0'; + if (old_count) { + count->val = old_count->val + strtoll(buf, NULL, 10); + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = strtoll(buf, NULL, 10); + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h new file mode 100644 index 000000000000..882566846df4 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HWMON_PMU_H +#define __HWMON_PMU_H + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +/** + * enum hwmon_type: + * + * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are + * defined over multiple files of the form <type><num>_<item>. This enum + * captures potential <type> values. + * + * This enum is exposed for testing. + */ +enum hwmon_type { + HWMON_TYPE_NONE, + + HWMON_TYPE_CPU, + HWMON_TYPE_CURR, + HWMON_TYPE_ENERGY, + HWMON_TYPE_FAN, + HWMON_TYPE_HUMIDITY, + HWMON_TYPE_IN, + HWMON_TYPE_INTRUSION, + HWMON_TYPE_POWER, + HWMON_TYPE_PWM, + HWMON_TYPE_TEMP, + + HWMON_TYPE_MAX +}; + +/** + * enum hwmon_item: + * + * Similar to enum hwmon_type but describes the item part of a a sysfs filename. + * + * This enum is exposed for testing. + */ +enum hwmon_item { + HWMON_ITEM_NONE, + + HWMON_ITEM_ACCURACY, + HWMON_ITEM_ALARM, + HWMON_ITEM_AUTO_CHANNELS_TEMP, + HWMON_ITEM_AVERAGE, + HWMON_ITEM_AVERAGE_HIGHEST, + HWMON_ITEM_AVERAGE_INTERVAL, + HWMON_ITEM_AVERAGE_INTERVAL_MAX, + HWMON_ITEM_AVERAGE_INTERVAL_MIN, + HWMON_ITEM_AVERAGE_LOWEST, + HWMON_ITEM_AVERAGE_MAX, + HWMON_ITEM_AVERAGE_MIN, + HWMON_ITEM_BEEP, + HWMON_ITEM_CAP, + HWMON_ITEM_CAP_HYST, + HWMON_ITEM_CAP_MAX, + HWMON_ITEM_CAP_MIN, + HWMON_ITEM_CRIT, + HWMON_ITEM_CRIT_HYST, + HWMON_ITEM_DIV, + HWMON_ITEM_EMERGENCY, + HWMON_ITEM_EMERGENCY_HIST, + HWMON_ITEM_ENABLE, + HWMON_ITEM_FAULT, + HWMON_ITEM_FREQ, + HWMON_ITEM_HIGHEST, + HWMON_ITEM_INPUT, + HWMON_ITEM_LABEL, + HWMON_ITEM_LCRIT, + HWMON_ITEM_LCRIT_HYST, + HWMON_ITEM_LOWEST, + HWMON_ITEM_MAX, + HWMON_ITEM_MAX_HYST, + HWMON_ITEM_MIN, + HWMON_ITEM_MIN_HYST, + HWMON_ITEM_MOD, + HWMON_ITEM_OFFSET, + HWMON_ITEM_PULSES, + HWMON_ITEM_RATED_MAX, + HWMON_ITEM_RATED_MIN, + HWMON_ITEM_RESET_HISTORY, + HWMON_ITEM_TARGET, + HWMON_ITEM_TYPE, + HWMON_ITEM_VID, + + HWMON_ITEM__MAX, +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); +bool evsel__is_hwmon(const struct evsel *evsel); + +/** + * parse_hwmon_filename() - Parse filename into constituent parts. + * + * @filename: To be parsed, of the form <type><number>_<item>. + * @type: The type defined from the parsed file name. + * @number: The number of the type, for example there may be more than 1 fan. + * @item: A hwmon <type><number> may have multiple associated items. + * @alarm: Is the filename for an alarm value? + * + * An example of a hwmon filename is "temp1_input". The type is temp for a + * temperature value. The number is 1. The item within the file is an input + * value - the temperature itself. This file doesn't contain an alarm value. + * + * Exposed for testing. + */ +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm); + +/** + * hwmon_pmu__new() - Allocate and construct a hwmon PMU. + * + * @pmus: The list of PMUs to be added to. + * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory. + * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. + * @name: The contents of the "name" file in the hwmon directory. + * + * Exposed for testing. Regular construction should happen via + * perf_pmus__read_hwmon_pmus. + */ +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, + const char *sysfs_name, const char *name); +void hwmon_pmu__exit(struct perf_pmu *pmu); + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t hwmon_pmu__num_events(struct perf_pmu *pmu); +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name); +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err); + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus); + + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __HWMON_PMU_H */ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 75b28dcc8317..6f1b9f6b2466 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,52 +2,132 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ #include "annotate.h" +#include <elf.h> + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + +#ifndef EM_CSKY +#define EM_CSKY 252 /* C-SKY */ +#endif +#ifndef EF_CSKY_ABIV1 +#define EF_CSKY_ABIV1 0X10000000 +#endif +#ifndef EF_CSKY_ABIV2 +#define EF_CSKY_ABIV2 0X20000000 +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + +/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */ +#if defined(__x86_64__) + #define EM_HOST EM_X86_64 +#elif defined(__i386__) + #define EM_HOST EM_386 +#elif defined(__aarch64__) + #define EM_HOST EM_AARCH64 +#elif defined(__arm__) + #define EM_HOST EM_ARM +#elif defined(__alpha__) + #define EM_HOST EM_ALPHA +#elif defined(__arc__) + #define EM_HOST EM_ARC +#elif defined(__AVR__) + #define EM_HOST EM_AVR +#elif defined(__AVR32__) + #define EM_HOST EM_AVR32 +#elif defined(__bfin__) + #define EM_HOST EM_BLACKFIN +#elif defined(__csky__) + #define EM_HOST EM_CSKY + #if defined(__CSKYABIV2__) + #define EF_HOST EF_CSKY_ABIV2 + #else + #define EF_HOST EF_CSKY_ABIV1 + #endif +#elif defined(__cris__) + #define EM_HOST EM_CRIS +#elif defined(__hppa__) // HP PA-RISC + #define EM_HOST EM_PARISC +#elif defined(__loongarch__) + #define EM_HOST EM_LOONGARCH +#elif defined(__mips__) + #define EM_HOST EM_MIPS +#elif defined(__m32r__) + #define EM_HOST EM_M32R +#elif defined(__microblaze__) + #define EM_HOST EM_MICROBLAZE +#elif defined(__MSP430__) + #define EM_HOST EM_MSP430 +#elif defined(__powerpc64__) + #define EM_HOST EM_PPC64 +#elif defined(__powerpc__) + #define EM_HOST EM_PPC +#elif defined(__riscv) + #define EM_HOST EM_RISCV +#elif defined(__s390x__) + #define EM_HOST EM_S390 +#elif defined(__sh__) + #define EM_HOST EM_SH +#elif defined(__sparc64__) || defined(__sparc__) + #define EM_HOST EM_SPARC +#elif defined(__xtensa__) + #define EM_HOST EM_XTENSA +#else + /* Unknown host ELF machine type. */ + #define EM_HOST EM_NONE +#endif + +#if !defined(EF_HOST) + #define EF_HOST 0 +#endif #define DWARF_REG_PC 0xd3af9c /* random number */ #define DWARF_REG_FB 0xd3affb /* random number */ -#ifdef HAVE_DWARF_SUPPORT -const char *get_arch_regstr(unsigned int n); -/* - * get_dwarf_regstr - Returns ftrace register string from DWARF regnum - * n: DWARF register number - * machine: ELF machine signature (EM_*) +#ifdef HAVE_LIBDW_SUPPORT +const char *get_csky_regstr(unsigned int n, unsigned int flags); + +/** + * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum. + * @n: DWARF register number. + * @machine: ELF machine signature (EM_*). + * @flags: ELF flags for things like ABI differences. */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine); +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags); +int get_x86_regnum(const char *name); + +#if !defined(__x86_64__) && !defined(__i386__) int get_arch_regnum(const char *name); +#endif + /* * get_dwarf_regnum - Returns DWARF regnum from register name * name: architecture register name * machine: ELF machine signature (EM_*) */ -int get_dwarf_regnum(const char *name, unsigned int machine); +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags); + +void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline int get_dwarf_regnum(const char *name __maybe_unused, - unsigned int machine __maybe_unused) + unsigned int machine __maybe_unused, + unsigned int flags __maybe_unused) { return -1; } -#endif -#if !defined(__powerpc__) || !defined(HAVE_DWARF_SUPPORT) static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused, struct annotated_op_loc *op_loc __maybe_unused) { return; } -#else -void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); #endif -#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -/* - * Arch should support fetching the offset of a register in pt_regs - * by its name. See kernel's regs_query_register_offset in - * arch/xxx/kernel/ptrace.c. - */ -int regs_query_register_offset(const char *name); -#endif #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 27d9b5c9fec8..a7c589fecb98 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused, else sz = len; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < sz; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < br_sz; i++) @@ -808,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, static const char * const intel_bts_info_fmts[] = { [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index fd2597613f3d..30be6dfe09eb 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -249,7 +249,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -4110,7 +4110,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fad227b625d1..27d5345d2b30 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -134,6 +134,8 @@ struct machine *machine__new_host(void) if (machine__create_kernel_maps(machine) < 0) goto out_delete; + + machine->env = &perf_env; } return machine; @@ -1343,7 +1345,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo * we need to update the symtab_type if needed. */ if (m->comp && is_kmod_dso(dso)) { - dso__set_symtab_type(dso, dso__symtab_type(dso)); + dso__set_symtab_type(dso, dso__symtab_type(dso)+1); dso__set_comp(dso, m->comp); } map__put(map); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 051feb93ed8d..bf5090f5220b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -366,6 +366,12 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_L1] = "L1", + [PERF_MEM_LVLNUM_L2] = "L2", + [PERF_MEM_LVLNUM_L3] = "L3", + [PERF_MEM_LVLNUM_L4] = "L4", + [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB", + [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache", [PERF_MEM_LVLNUM_UNC] = "Uncached", [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", @@ -448,7 +454,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf if (mem_lvlnum[lvl]) l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); else - l += scnprintf(out + l, sz - l, "L%d", lvl); + l += scnprintf(out + l, sz - l, "Unknown level %d", lvl); l += scnprintf(out + l, sz - l, " %s", hit_miss); return l; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4dff3e925a47..46920ebadfd1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -14,6 +14,7 @@ #include "pmus.h" #include "print-events.h" #include "smt.h" +#include "tool_pmu.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -297,8 +298,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, struct expr_id_data *val_ptr; /* Don't match events for the wrong hybrid PMU. */ - if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && - strcmp(ev->pmu_name, pmu)) + if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) && + strcmp(ev->pmu->name, pmu)) continue; /* * Check for duplicate events with the same name. For @@ -673,20 +674,20 @@ static int metricgroup__build_event_string(struct strbuf *events, struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_tool_events = false; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->pkey; - enum perf_tool_event ev; + enum tool_pmu_event ev; pr_debug("found event %s\n", id); /* Always move tool events outside of the group. */ - ev = perf_tool_event__from_str(id); - if (ev != PERF_TOOL_NONE) { + ev = tool_pmu__str_to_event(id); + if (ev != TOOL_PMU__EVENT_NONE) { has_tool_events = true; tool_events[ev] = true; continue; @@ -754,14 +755,14 @@ static int metricgroup__build_event_string(struct strbuf *events, if (has_tool_events) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { if (!no_group) { ret = strbuf_addch(events, ','); RETURN_IF_NON_ZERO(ret); } no_group = false; - ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + ret = strbuf_addstr(events, tool_pmu__event_to_str(i)); RETURN_IF_NON_ZERO(ret); } } @@ -1147,14 +1148,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data)) left_count--; } right_count = hashmap__size(right->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data)) right_count--; } @@ -1374,18 +1375,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list) * to true if tool event is found. */ static void find_tool_events(const struct list_head *metric_list, - bool tool_events[PERF_TOOL_MAX]) + bool tool_events[TOOL_PMU__EVENT_MAX]) { struct metric *m; list_for_each_entry(m, metric_list, nd) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { struct expr_id_data *data; if (!tool_events[i] && - !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data)) tool_events[i] = true; } } @@ -1446,7 +1447,7 @@ err_out: */ static int parse_ids(bool metric_no_merge, bool fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool group_events, const bool tool_events[PERF_TOOL_MAX], + bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX], struct evlist **out_evlist) { struct parse_events_error parse_error; @@ -1471,9 +1472,9 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu, * event1 if #smt_on else 0 * Add a tool event to avoid a parse error on an empty string. */ - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { - char *tmp = strdup(perf_tool_event__to_str(i)); + char *tmp = strdup(tool_pmu__event_to_str(i)); if (!tmp) return -ENOMEM; @@ -1535,7 +1536,7 @@ static int parse_groups(struct evlist *perf_evlist, struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; bool is_default = !strcmp(str, "Default"); int ret; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9a8be1e46d67..afeb8d815bbf 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -228,7 +228,7 @@ __add_event(struct list_head *list, int *idx, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, - struct perf_cpu_map *cpu_list) + struct perf_cpu_map *cpu_list, u64 alternate_hw_config) { struct evsel *evsel; struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list; @@ -263,7 +263,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.is_pmu_core = pmu ? pmu->is_core : false; evsel->auto_merge_stats = auto_merge_stats; evsel->pmu = pmu; - evsel->pmu_name = pmu ? strdup(pmu->name) : NULL; + evsel->alternate_hw_config = alternate_hw_config; if (name) evsel->name = strdup(name); @@ -286,47 +286,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms, + u64 alternate_hw_config) { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; -} - -static int add_event_tool(struct list_head *list, int *idx, - enum perf_tool_event tool_event) -{ - struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_DUMMY, - }; - struct perf_cpu_map *cpu_list = NULL; - - if (tool_event == PERF_TOOL_DURATION_TIME) { - /* Duration time is gathered globally, pretend it is only on CPU0. */ - cpu_list = perf_cpu_map__new("0"); - } - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - cpu_list); - perf_cpu_map__put(cpu_list); - if (!evsel) - return -ENOMEM; - evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME - || tool_event == PERF_TOOL_USER_TIME - || tool_event == PERF_TOOL_SYSTEM_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } - return 0; + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + alternate_hw_config) ? 0 : -ENOMEM; } /** @@ -450,7 +422,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats); + bool auto_merge_stats, u64 alternate_hw_config); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, @@ -476,7 +448,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, */ ret = parse_events_add_pmu(parse_state, list, pmu, parsed_terms, - perf_pmu__auto_merge_stats(pmu)); + perf_pmu__auto_merge_stats(pmu), + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) return ret; continue; @@ -507,7 +480,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) == NULL) + /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) return -ENOMEM; free_config_terms(&config_terms); @@ -772,7 +746,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, name = get_config_name(head_config); return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, - &config_terms); + &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int check_type_val(struct parse_events_term *term, @@ -794,7 +768,7 @@ static int check_type_val(struct parse_events_term *term, static bool config_term_shrinked; -static const char *config_term_name(enum parse_events__term_type term_type) +const char *parse_events__term_type_str(enum parse_events__term_type term_type) { /* * Update according to parse-events.l @@ -880,7 +854,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", - config_term_name(term_type)) >= 0) + parse_events__term_type_str(term_type)) >= 0) parse_events_error__handle(err, -1, err_str, NULL); return false; } @@ -1004,7 +978,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_HARDWARE: default: parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), + strdup(parse_events__term_type_str(term->type_term)), parse_events_formats_error_string(NULL)); return -EINVAL; } @@ -1072,6 +1046,7 @@ static int config_term_pmu(struct perf_event_attr *attr, if (perf_pmu__have_event(pmu, term->config)) { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; term->no_value = true; + term->alternate_hw_config = true; } else { attr->type = PERF_TYPE_HARDWARE; attr->config = term->val.num; @@ -1127,8 +1102,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr, default: if (err) { parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), - strdup("valid terms: call-graph,stack-size\n")); + strdup(parse_events__term_type_str(term->type_term)), + strdup("valid terms: call-graph,stack-size\n") + ); } return -EINVAL; } @@ -1384,8 +1360,9 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) ? 0 : -ENOMEM; + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX + ) == NULL ? -ENOMEM : 0; free_config_terms(&config_terms); return ret; } @@ -1421,13 +1398,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, type, /*extended_type=*/0, config, head_config); } -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event) -{ - return add_event_tool(list, &parse_state->idx, tool_event); -} - static bool config_term_percore(struct list_head *config_terms) { struct evsel_config_term *term; @@ -1443,7 +1413,7 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats) + bool auto_merge_stats, u64 alternate_hw_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1480,7 +1450,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1501,7 +1471,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /* Look for event names in the terms and rewrite into format based terms. */ if (perf_pmu__check_alias(pmu, &parsed_terms, - &info, &alias_rewrote_terms, err)) { + &info, &alias_rewrote_terms, + &alternate_hw_config, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1537,7 +1508,9 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; } - if (perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) { + /* Skip configuring hard coded terms that were applied by config_attr. */ + if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false, + parse_state->error)) { free_config_terms(&config_terms); parse_events_terms__exit(&parsed_terms); return -EINVAL; @@ -1546,7 +1519,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); + &config_terms, auto_merge_stats, /*cpu_list=*/NULL, + alternate_hw_config); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1567,7 +1541,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc_) { @@ -1620,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats)) { + &parsed_terms, auto_merge_stats, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1633,7 +1607,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - /*auto_merge_stats=*/true)) { + /*auto_merge_stats=*/true, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1674,13 +1648,15 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; } @@ -1693,7 +1669,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - auto_merge_stats)) { + auto_merge_stats, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { ok++; parse_state->wild_card_pmus = true; } @@ -1704,7 +1681,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failure to add, assume event_or_pmu is an event name. */ zfree(listp); - if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc)) + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX, + const_parsed_terms, listp, loc)) return 0; if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0) @@ -1755,14 +1733,10 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, int exclude = eu | ek | eh; int exclude_GH = group ? evsel->exclude_GH : 0; - if (mod.precise) { - /* use of precise requires exclude_guest */ - eG = 1; - } if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) + if (!exclude_GH && !perf_guest && exclude_GH_default) eG = 1; eu = 0; } @@ -2566,7 +2540,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_name(type_term)), + .config = config ? : strdup(parse_events__term_type_str(type_term)), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -2600,7 +2574,7 @@ int parse_events_term__term(struct parse_events_term **term, void *loc_term, void *loc_val) { return parse_events_term__str(term, term_lhs, NULL, - strdup(config_term_name(term_rhs)), + strdup(parse_events__term_type_str(term_rhs)), loc_term, loc_val); } @@ -2707,7 +2681,8 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct if (ret < 0) return ret; } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) { - ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term)); + ret = strbuf_addf(sb, "%s=", + parse_events__term_type_str(term->type_term)); if (ret < 0) return ret; } @@ -2727,7 +2702,7 @@ static void config_terms_list(char *buf, size_t buf_sz) buf[0] = '\0'; for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { - const char *name = config_term_name(i); + const char *name = parse_events__term_type_str(i); if (!config_term_avail(i, NULL)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 10cc9c433116..3f4334ec6231 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -127,6 +127,12 @@ struct parse_events_term { * value is assumed to be 1. An event name also has no value. */ bool no_value; + /** + * @alternate_hw_config: config is the event name but num is an + * alternate PERF_TYPE_HARDWARE config value which is often nice for the + * sake of quick matching. + */ + bool alternate_hw_config; }; struct parse_events_error { @@ -162,6 +168,8 @@ struct parse_events_state { bool wild_card_pmus; }; +const char *parse_events__term_type_str(enum parse_events__term_type term_type); + bool parse_events__filter_pmu(const struct parse_events_state *parse_state, const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); @@ -221,9 +229,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, u32 type, u64 config, const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, struct parse_events_terms *parsed_terms); @@ -238,7 +243,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5a0bcd7f166a..14e5bd856a18 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -121,14 +121,6 @@ static int sym(yyscan_t scanner, int type, int config) return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; } -static int tool(yyscan_t scanner, enum perf_tool_event event) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = event; - return PE_VALUE_SYM_TOOL; -} - static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -404,9 +396,6 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } -user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } -system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index b3c51f06cbdc..f888cbb076d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -56,7 +56,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM -%token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH @@ -68,7 +67,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW -%type <num> PE_VALUE_SYM_TOOL %type <mod> PE_MODIFIER_EVENT %type <term_type> PE_TERM %type <num> value_sym @@ -292,7 +290,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1); + err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -350,20 +348,6 @@ value_sym sep_slash_slash_dc PE_ABORT(err); $$ = list; } -| -PE_VALUE_SYM_TOOL sep_slash_slash_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_tool(_parse_state, list, $1); - if (err) - YYNOMEM; - $$ = list; -} event_legacy_cache: PE_LEGACY_CACHE opt_event_config diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 5ccfe4b64cdf..0dacc133ed39 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -233,7 +233,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, } if (is_libpfm_event_supported(name, cpus, threads)) { - print_cb->print_event(print_state, pinfo->name, topic, + print_cb->print_event(print_state, topic, pinfo->name, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -267,8 +267,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, continue; print_cb->print_event(print_state, - pinfo->name, topic, + pinfo->name, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61bdda01a05a..08a9d0bd9301 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,7 +18,9 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "hwmon_pmu.h" #include "pmus.h" +#include "tool_pmu.h" #include <util/pmu-bison.h> #include <util/pmu-flex.h> #include "parse-events.h" @@ -817,31 +819,6 @@ static int is_sysfs_pmu_core(const char *name) return file_available(path); } -char *perf_pmu__getcpuid(struct perf_pmu *pmu) -{ - char *cpuid; - static bool printed; - - cpuid = getenv("PERF_CPUID"); - if (cpuid) - cpuid = strdup(cpuid); - if (!cpuid) - cpuid = get_cpuid_str(pmu); - if (!cpuid) - return NULL; - - if (!printed) { - pr_debug("Using CPUID %s\n", cpuid); - printed = true; - } - return cpuid; -} - -__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - return perf_pmu__find_metrics_table(NULL); -} - /** * Return the length of the PMU name not including the suffix for uncore PMUs. * @@ -1168,7 +1145,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm return pmu; } -static bool perf_pmu__is_fake(const struct perf_pmu *pmu) +bool perf_pmu__is_fake(const struct perf_pmu *pmu) { return pmu->type == PERF_PMU_TYPE_FAKE; } @@ -1366,7 +1343,8 @@ static int pmu_config_term(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_terms *head_terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -1380,11 +1358,46 @@ static int pmu_config_term(const struct perf_pmu *pmu, return 0; /* - * Hardcoded terms should be already in, so nothing - * to be done for them. + * Hardcoded terms are generally handled in event parsing, which + * traditionally have had to handle not having a PMU. An alias may + * have hard coded config values, optionally apply them below. */ - if (parse_events__is_hardcoded_term(term)) + if (parse_events__is_hardcoded_term(term)) { + /* Config terms set all bits in the config. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + + if (!apply_hardcoded) + return 0; + + bitmap_fill(bits, PERF_PMU_FORMAT_BITS); + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config1, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config2, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config3, zero); + break; + case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ + return -EINVAL; + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + /* Skip non-config terms. */ + break; + default: + break; + } return 0; + } format = pmu_find_format(&pmu->format, term->config); if (!format) { @@ -1466,13 +1479,12 @@ static int pmu_config_term(const struct perf_pmu *pmu, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, - asprintf(&err_str, - "value too big for format (%s), maximum is %llu", - format->name, (unsigned long long)max_val) < 0 - ? strdup("value too big for format") - : err_str, - NULL); + if (asprintf(&err_str, + "value too big for format (%s), maximum is %llu", + format->name, (unsigned long long)max_val) < 0) { + err_str = strdup("value too big for format"); + } + parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL); return -EINVAL; } /* @@ -1488,12 +1500,16 @@ static int pmu_config_term(const struct perf_pmu *pmu, int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct parse_events_term *term; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__config_terms(pmu, attr, terms, err); + list_for_each_entry(term, &terms->terms, list) { - if (pmu_config_term(pmu, attr, term, terms, zero, err)) + if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) return -EINVAL; } @@ -1507,6 +1523,7 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *err) { bool zero = !!pmu->perf_event_attr_init_default; @@ -1515,7 +1532,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, if (perf_pmu__is_fake(pmu)) return 0; - return perf_pmu__config_terms(pmu, attr, head_terms, zero, err); + return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, @@ -1606,7 +1623,7 @@ static int check_info_data(struct perf_pmu *pmu, */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err) + u64 *alternate_hw_config, struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; @@ -1623,6 +1640,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->scale = 0.0; info->snapshot = false; + if (perf_pmu__is_hwmon(pmu)) { + ret = hwmon_pmu__check_alias(head_terms, info, err); + goto out; + } + /* Fake PMU doesn't rewrite terms. */ if (perf_pmu__is_fake(pmu)) goto out; @@ -1638,6 +1660,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) @@ -1646,6 +1669,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (alias->per_pkg) info->per_pkg = true; + if (term->alternate_hw_config) + *alternate_hw_config = term->val.num; + list_del_init(&term->list); parse_events_term__delete(term); } @@ -1790,6 +1816,10 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { if (!name) return false; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) + return false; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) @@ -1801,6 +1831,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__num_events(pmu); + pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases + pmu->sys_json_aliases; @@ -1811,6 +1844,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) else assert(pmu->cpu_json_aliases == 0); + if (perf_pmu__is_tool(pmu)) + nr -= tool_pmu__num_skip_events(); + return pmu->selectable ? nr + 1 : nr; } @@ -1861,12 +1897,18 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, int ret = 0; struct strbuf sb; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__for_each_event(pmu, state, cb); + strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { size_t buf_used, pmu_name_len; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) + continue; + info.pmu_name = event->pmu_name ?: pmu->name; pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name, skip_duplicate_pmus); @@ -1949,6 +1991,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu) case PERF_TYPE_HW_CACHE: return false; case PERF_TYPE_RAW: return false; case PERF_TYPE_BREAKPOINT: return true; + case PERF_PMU_TYPE_TOOL: return true; default: break; } for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) { @@ -2200,11 +2243,6 @@ bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok) (need_fnmatch && !fnmatch(tok, name, 0)); } -double __weak perf_pmu__cpu_slots_per_cycle(void) -{ - return NAN; -} - int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) { const char *sysfs = sysfs__mountpoint(); @@ -2257,6 +2295,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (perf_pmu__is_hwmon(pmu)) + hwmon_pmu__exit(pmu); + perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); perf_pmu__del_caps(pmu); @@ -2280,7 +2321,9 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { struct perf_event_attr attr = {.config = 0,}; - int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + + int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, + /*err=*/NULL); if (ret == 0 && config == attr.config) return event->name; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4397c48ad569..dbed6c243a5e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -37,6 +37,9 @@ struct perf_pmu_caps { }; enum { + PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, + PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, + PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, PERF_PMU_TYPE_FAKE = 0xFFFFFFFF, }; @@ -169,6 +172,10 @@ struct perf_pmu { * exclude_host. */ bool exclude_guest; + /** + * @checked: Are the missing features checked? + */ + bool checked; } missing_features; /** @@ -206,16 +213,18 @@ typedef int (*pmu_format_callback)(void *state, const char *name, int config, void pmu_add_sys_aliases(struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *error); int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *error); + bool zero, bool apply_hardcoded, + struct parse_events_error *error); __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err); + u64 *alternate_hw_config, struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); void perf_pmu_format__set_value(void *format, int config, unsigned long *bits); @@ -253,8 +262,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu); void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table); -char *perf_pmu__getcpuid(struct perf_pmu *pmu); -const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); @@ -268,7 +275,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok); -double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); @@ -280,6 +286,8 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__find_core_pmu(void); + const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); +bool perf_pmu__is_fake(const struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 52109af5f2f1..b493da0d22ef 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -15,6 +15,8 @@ #include "evsel.h" #include "pmus.h" #include "pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "print-events.h" #include "strbuf.h" @@ -200,6 +202,7 @@ static void pmu_read_sysfs(bool core_only) int fd; DIR *dir; struct dirent *dent; + struct perf_pmu *tool_pmu; if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) return; @@ -229,6 +232,11 @@ static void pmu_read_sysfs(bool core_only) pr_err("Failure to set up any core PMUs\n"); } list_sort(NULL, &core_pmus, pmus_cmp); + if (!core_only) { + tool_pmu = perf_pmus__tool_pmu(); + list_add_tail(&tool_pmu->list, &other_pmus); + perf_pmus__read_hwmon_pmus(&other_pmus); + } list_sort(NULL, &other_pmus, pmus_cmp); if (!list_empty(&core_pmus)) { read_sysfs_core_pmus = true; @@ -434,6 +442,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate, pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name); return 1; } + assert(info->pmu != NULL || info->name != NULL); s = &state->aliases[state->index]; s->pmu = info->pmu; #define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL @@ -494,8 +503,8 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p goto free; print_cb->print_event(print_state, - aliases[j].pmu_name, aliases[j].topic, + aliases[j].pmu_name, aliases[j].name, aliases[j].alias, aliases[j].scale_unit, @@ -724,6 +733,13 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); } +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name) +{ + return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name); +} + struct perf_pmu *perf_pmus__fake_pmu(void) { static struct perf_pmu fake = { diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index e1742b56eec7..a0cb0eb2ff97 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -30,6 +30,9 @@ bool perf_pmus__supports_extended_type(void); char *perf_pmus__default_pmu_name(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name); struct perf_pmu *perf_pmus__fake_pmu(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 81e0135cddf0..a786cbfb0ff5 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -29,6 +29,7 @@ #include "tracepoint.h" #include "pfm.h" #include "thread_map.h" +#include "tool_pmu.h" #include "util.h" #define MAX_NAME_LEN 100 @@ -43,21 +44,6 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; -static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { - [PERF_TOOL_DURATION_TIME] = { - .symbol = "duration_time", - .alias = "", - }, - [PERF_TOOL_USER_TIME] = { - .symbol = "user_time", - .alias = "", - }, - [PERF_TOOL_SYSTEM_TIME] = { - .symbol = "system_time", - .alias = "", - }, -}; - /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -341,24 +327,6 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta return 0; } -void print_tool_events(const struct print_callbacks *print_cb, void *print_state) -{ - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) { - print_cb->print_event(print_state, - "tool", - /*pmu_name=*/NULL, - event_symbols_tool[i].symbol, - event_symbols_tool[i].alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tool event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } -} - void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max) @@ -422,8 +390,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(print_cb, print_state); - print_hwcache_events(print_cb, print_state); perf_pmus__print_pmu_events(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index bf4290bef0cd..445efa1636c1 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -36,7 +36,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max); -void print_tool_events(const struct print_callbacks *print_cb, void *print_state); void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a17c9b8a7a79..eaa0318e9b87 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,6 +40,7 @@ #include "session.h" #include "string2.h" #include "strbuf.h" +#include "parse-events.h" #include <subcmd/pager.h> #include <linux/ctype.h> @@ -51,6 +52,9 @@ #define PERFPROBE_GROUP "probe" +/* Defined in kernel/trace/trace.h */ +#define MAX_EVENT_NAME_LEN 64 + bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; @@ -342,7 +346,7 @@ elf_err: return mod_name; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) { @@ -1036,6 +1040,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) return rv; } +static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr) +{ + if (!lr->function) + return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start); + + if (lr->file) + return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start); + + return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start); +} + #define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) #define show_one_line(f,l) _show_one_line(f,l,false,false) #define skip_one_line(f,l) _show_one_line(f,l,true,false) @@ -1065,9 +1080,12 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = debuginfo__find_line_range(dinfo, lr); if (!ret) { /* Not found, retry with an alternative */ + pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n"); ret = get_alternative_line_range(dinfo, lr, module, user); if (!ret) ret = debuginfo__find_line_range(dinfo, lr); + else /* Ignore error, we just failed to find it. */ + ret = -ENOENT; } if (dinfo->build_id) { build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); @@ -1075,7 +1093,8 @@ static int __show_line_range(struct line_range *lr, const char *module, } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { - pr_warning("Specified source line is not found.\n"); + sprint_line_description(sbuf, sizeof(sbuf), lr); + pr_warning("Specified source line(%s) is not found.\n", sbuf); return -ENOENT; } else if (ret < 0) { pr_warning("Debuginfo analysis failed.\n"); @@ -1250,7 +1269,7 @@ out: return ret; } -#else /* !HAVE_DWARF_SUPPORT */ +#else /* !HAVE_LIBDW_SUPPORT */ static void debuginfo_cache__exit(void) { @@ -1343,30 +1362,39 @@ static bool is_c_func_name(const char *name) * * SRC[:SLN[+NUM|-ELN]] * FNC[@SRC][:SLN[+NUM|-ELN]] + * + * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name. + * SRC and FUNC can be quoted by double/single quotes. */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *file, *name = strdup(arg); - int err; + char *buf = strdup(arg); + char *p; + int err = 0; - if (!name) + if (!buf) return -ENOMEM; lr->start = 0; lr->end = INT_MAX; - range = strchr(name, ':'); - if (range) { - *range++ = '\0'; + p = strpbrk_esq(buf, ":"); + if (p) { + if (p == buf) { + semantic_error("No file/function name in '%s'.\n", p); + err = -EINVAL; + goto err; + } + *(p++) = '\0'; - err = parse_line_num(&range, &lr->start, "start line"); + err = parse_line_num(&p, &lr->start, "start line"); if (err) goto err; - if (*range == '+' || *range == '-') { - const char c = *range++; + if (*p == '+' || *p == '-') { + const char c = *(p++); - err = parse_line_num(&range, &lr->end, "end line"); + err = parse_line_num(&p, &lr->end, "end line"); if (err) goto err; @@ -1390,34 +1418,41 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) " than end line.\n"); goto err; } - if (*range != '\0') { - semantic_error("Tailing with invalid str '%s'.\n", range); + if (*p != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", p); goto err; } } - file = strchr(name, '@'); - if (file) { - *file = '\0'; - lr->file = strdup(++file); - if (lr->file == NULL) { - err = -ENOMEM; + p = strpbrk_esq(buf, "@"); + if (p) { + *p++ = '\0'; + if (strcmp(p, "*")) { + lr->file = strdup_esq(p); + if (lr->file == NULL) { + err = -ENOMEM; + goto err; + } + } + if (*buf != '\0') + lr->function = strdup_esq(buf); + if (!lr->function && !lr->file) { + semantic_error("Only '@*' is not allowed.\n"); + err = -EINVAL; goto err; } - lr->function = name; - } else if (strchr(name, '/') || strchr(name, '.')) - lr->file = name; - else if (is_c_func_name(name))/* We reuse it for checking funcname */ - lr->function = name; + } else if (strpbrk_esq(buf, "/.")) + lr->file = strdup_esq(buf); + else if (is_c_func_name(buf))/* We reuse it for checking funcname */ + lr->function = strdup_esq(buf); else { /* Invalid name */ - semantic_error("'%s' is not a valid function name.\n", name); + semantic_error("'%s' is not a valid function name.\n", buf); err = -EINVAL; goto err; } - return 0; err: - free(name); + free(buf); return err; } @@ -1425,19 +1460,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strpbrk_esc(*arg, ":"); + ptr = strpbrk_esq(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup_esc(*arg); + pev->group = strdup_esq(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - pev->event = strdup_esc(*arg); + pev->event = strdup_esq(*arg); if (pev->event == NULL) return -ENOMEM; @@ -1476,7 +1511,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk_esc(arg, ";=@+%"); + ptr = strpbrk_esq(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1490,7 +1525,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup_esc(ptr + 1); + pev->target = strdup_esq(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1531,7 +1566,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) file_spec = true; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1540,7 +1575,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup_esc(arg); + tmp = strdup_esq(arg); if (tmp == NULL) return -ENOMEM; } @@ -1578,7 +1613,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -ENOMEM; break; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1605,7 +1640,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup_esc(arg); + if (!strcmp(arg, "*")) + break; + pp->file = strdup_esq(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2757,7 +2794,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, /* Try no suffix number */ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { - pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase); + pr_warning("snprintf() failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2777,7 +2817,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, for (i = 1; i < MAX_EVENT_INDEX; i++) { ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("Add suffix failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2841,7 +2884,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, bool allow_suffix) { const char *event, *group; - char buf[64]; + char buf[MAX_EVENT_NAME_LEN]; int ret; /* If probe_event or trace_event already have the name, reuse it */ @@ -2865,6 +2908,12 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, else group = PERFPROBE_GROUP; + if (strlen(group) >= MAX_EVENT_NAME_LEN) { + pr_err("Probe group string='%s' is too long (>= %d bytes)\n", + group, MAX_EVENT_NAME_LEN); + return -ENOMEM; + } + /* Get an unused new event name */ ret = get_new_event_name(buf, sizeof(buf), event, namelist, tev->point.retprobe, allow_suffix); @@ -3705,59 +3754,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) } } -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) -{ - int ret; - - ret = init_probe_symbol_maps(pevs->uprobes); - if (ret < 0) - return ret; - - ret = convert_perf_probe_events(pevs, npevs); - if (ret == 0) - ret = apply_perf_probe_events(pevs, npevs); - - cleanup_perf_probe_events(pevs, npevs); - - exit_probe_symbol_maps(); - return ret; -} - -int del_perf_probe_events(struct strfilter *filter) -{ - int ret, ret2, ufd = -1, kfd = -1; - char *str = strfilter__string(filter); - - if (!str) - return -EINVAL; - - /* Get current event names */ - ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); - if (ret < 0) - goto out; - - ret = probe_file__del_events(kfd, filter); - if (ret < 0 && ret != -ENOENT) - goto error; - - ret2 = probe_file__del_events(ufd, filter); - if (ret2 < 0 && ret2 != -ENOENT) { - ret = ret2; - goto error; - } - ret = 0; - -error: - if (kfd >= 0) - close(kfd); - if (ufd >= 0) - close(ufd); -out: - free(str); - - return ret; -} - int show_available_funcs(const char *target, struct nsinfo *nsi, struct strfilter *_filter, bool user) { diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 7e3b6c3d1f74..61a5f4ff4e9c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -159,7 +159,6 @@ void line_range__clear(struct line_range *lr); /* Initialize line range */ int line_range__init(struct line_range *lr); -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); @@ -168,8 +167,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); struct strfilter; -int del_perf_probe_events(struct strfilter *filter); - int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 3d50de3217d5..ec8ac242fedb 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) -{ - struct strlist *namelist; - int ret; - - namelist = strlist__new(NULL, NULL); - if (!namelist) - return -ENOMEM; - - ret = probe_file__get_events(fd, filter, namelist); - if (ret < 0) - goto out; - - ret = probe_file__del_strlist(fd, namelist); -out: - strlist__delete(namelist); - return ret; -} - /* Caller must ensure to remove this entry from list */ static void probe_cache_entry__delete(struct probe_cache_entry *entry) { diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 0dba88c0f5f0..c2bb6a5b9dcc 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); -int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 630e16c54ed5..7f2ee0cb43ca 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -56,7 +56,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Op *fb_ops, Dwarf_Die *sp_die, - unsigned int machine, + const struct probe_finder *pf, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; @@ -166,7 +166,7 @@ static_var: if (!tvar) return ret2; - regs = get_dwarf_regstr(regn, machine); + regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " @@ -451,7 +451,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - &pf->sp_die, pf->machine, pf->tvar); + &pf->sp_die, pf, pf->tvar); if (ret == -ENOENT && pf->skip_empty_arg) /* This can be found in other place. skip it */ return 0; @@ -602,7 +602,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -613,7 +612,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1136,9 +1134,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (gelf_getehdr(elf, &ehdr) == NULL) return -EINVAL; - pf->machine = ehdr.e_machine; + pf->e_machine = ehdr.e_machine; + pf->e_flags = ehdr.e_flags; -#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1148,7 +1146,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1175,7 +1172,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, - pf->machine, NULL) == 0) { + pf, /*tvar=*/NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1379,6 +1376,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); + dwarf_cfi_end(tf.pf.cfi_eh); + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); @@ -1404,7 +1403,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, - af->pf.machine, NULL); + &af->pf, /*tvar=*/NULL); if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; @@ -1583,8 +1582,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { - /* Get function entry information */ - func = basefunc = dwarf_diename(&spdie); + /* + * Get function entry information. + * + * As described in the document DWARF Debugging Information + * Format Version 5, section 2.22 Linkage Names, "mangled names, + * are used in various ways, ... to distinguish multiple + * entities that have the same name". + * + * Firstly try to get distinct linkage name, if fail then + * rollback to get associated name in DIE. + */ + func = basefunc = die_get_linkage_name(&spdie); + if (!func) + func = basefunc = dwarf_diename(&spdie); + if (!func || die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { @@ -1863,7 +1875,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id, const char *prefix = symbol_conf.source_prefix; if (sbuild_id && !prefix) { - if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + char prefixed_raw_path[PATH_MAX]; + + path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path); + + if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path)) return 0; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3add5ff516e1..be7b46ea2460 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -21,7 +21,7 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #include "debuginfo.h" @@ -63,14 +63,13 @@ struct probe_finder { struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ -#if _ELFUTILS_PREREQ(0, 142) - /* Call Frame Information from .eh_frame */ + /* Call Frame Information from .eh_frame. Owned by this struct. */ Dwarf_CFI *cfi_eh; - /* Call Frame Information from .debug_frame */ + /* Call Frame Information from .debug_frame. Not owned. */ Dwarf_CFI *cfi_dbg; -#endif Dwarf_Op *fb_ops; /* Frame base attribute */ - unsigned int machine; /* Target machine arch */ + unsigned int e_machine; /* ELF target machine arch */ + unsigned int e_flags; /* ELF target machine flags */ struct perf_probe_arg *pvar; /* Current target variable */ struct probe_trace_arg *tvar; /* Current result variable */ bool skip_empty_arg; /* Skip non-exist args */ @@ -104,6 +103,6 @@ struct line_finder { int found; }; -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 31a223eaf8e6..2096cdbaa53b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,7 +6,7 @@ #include <linux/err.h> #include <perf/cpumap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include <perf/mmap.h> #include "evlist.h" @@ -19,6 +19,7 @@ #include "util/bpf-filter.h" #include "util/env.h" #include "util/kvm-stat.h" +#include "util/stat.h" #include "util/kwork.h" #include "util/sample.h" #include "util/lock-contention.h" @@ -1355,6 +1356,7 @@ error: unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; +#ifdef HAVE_KVM_STAT_SUPPORT bool kvm_entry_event(struct evsel *evsel __maybe_unused) { return false; @@ -1384,6 +1386,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, char *decode __maybe_unused) { } +#endif // HAVE_KVM_STAT_SUPPORT int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused, int num __maybe_unused, int pathlen __maybe_unused) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 73846b73d0cf..30638653ad2d 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx\n", + "\t\tC:%d TOD:%#llx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 53383e97ec9d..335217bb532b 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -98,12 +98,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.res2 = be32_to_cpu(tep->res2); color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" - " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n", offset, te.clock_base ? 'T' : ' ', te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', te.cfvn, te.csvn, te.cpu_speed, te.timestamp); - color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx" " Type:%x\n\n", te.progusage1, te.progusage2, te.progusage3, te.tod_base, te.mach_type); @@ -205,7 +205,7 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) char *ev_name = get_counter_name(ce.set, i, pmu); color_fprintf(stdout, color, - "\tCounter:%03d %s Value:%#018lx\n", i, + "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i, ev_name ?: "<unknown>", be64_to_cpu(*p)); free(ev_name); } @@ -260,7 +260,7 @@ static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) ev_name = get_counter_name(evsel->core.attr.config, pai_data.event_nr, evsel->pmu); - color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n", pai_data.event_nr, ev_name ?: "<unknown>", pai_data.event_val); free(ev_name); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e16257d5ab2c..85b7f188f729 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,7 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/time64.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <stdbool.h> /* perl needs the following define, right after including stdbool.h */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d7183134b669..8bdae066e839 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -31,7 +31,7 @@ #include <linux/compiler.h> #include <linux/time64.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "../build-id.h" @@ -793,7 +793,8 @@ static int set_regs_in_dict(PyObject *dict, static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { char sbuild_id[SBUILD_ID_SIZE]; @@ -809,6 +810,8 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(map__pgoff(al->map))); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -895,7 +898,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -920,7 +923,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); } if (sample->flags) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbaf07bf6c5f..507e6cba9545 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1171,18 +1171,24 @@ static int deliver_sample_value(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, - struct machine *machine) + struct machine *machine, + bool per_thread) { struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 *storage = NULL; if (sid) { + storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread); + } + + if (storage) { sample->id = v->id; - sample->period = v->value - sid->period; - sid->period = v->value; + sample->period = v->value - *storage; + *storage = v->value; } - if (!sid || sid->evsel == NULL) { + if (!storage || sid->evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } @@ -1203,17 +1209,19 @@ static int deliver_sample_group(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct machine *machine, - u64 read_format) + u64 read_format, + bool per_thread) { int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; if (tool->dont_split_sample_group) - return deliver_sample_value(evlist, tool, event, sample, v, machine); + return deliver_sample_value(evlist, tool, event, sample, v, machine, + per_thread); sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, - machine); + machine, per_thread); if (ret) break; } @@ -1228,6 +1236,7 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; u64 read_format = evsel->core.attr.read_format; + bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core); /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) @@ -1236,10 +1245,11 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine, read_format); + machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample, - &sample->read.one, machine); + &sample->read.one, machine, + per_thread); } static int machines__deliver_event(struct machines *machines, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 013020f33ece..9dd60c7869a2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -35,7 +35,7 @@ #include <linux/string.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif regex_t parent_regex; @@ -677,6 +677,102 @@ struct sort_entry sort_sym_ipc_null = { .se_width_idx = HISTC_SYMBOL_IPC, }; +/* --sort callchain_branch_predicted */ + +static int64_t +sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_predicted_snprintf( + struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + u64 branch_count, predicted_count; + double percent = 0.0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + &predicted_count, NULL, NULL); + + if (branch_count) + percent = predicted_count * 100.0 / branch_count; + + snprintf(str, sizeof(str), "%.1f%%", percent); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_predicted = { + .se_header = "Predicted", + .se_cmp = sort__callchain_branch_predicted_cmp, + .se_snprintf = hist_entry__callchain_branch_predicted_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED, +}; + +/* --sort callchain_branch_abort */ + +static int64_t +sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, abort_count; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, &abort_count, NULL); + + snprintf(str, sizeof(str), "%" PRId64, abort_count); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_abort = { + .se_header = "Abort", + .se_cmp = sort__callchain_branch_abort_cmp, + .se_snprintf = hist_entry__callchain_branch_abort_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT, +}; + +/* --sort callchain_branch_cycles */ + +static int64_t +sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, cycles_count, cycles = 0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, NULL, &cycles_count); + + if (branch_count) + cycles = cycles_count / branch_count; + + snprintf(str, sizeof(str), "%" PRId64 "", cycles); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_cycles = { + .se_header = "Cycles", + .se_cmp = sort__callchain_branch_cycles_cmp, + .se_snprintf = hist_entry__callchain_branch_cycles_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -2456,6 +2552,15 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), + DIM(SORT_CALLCHAIN_BRANCH_PREDICTED, + "callchain_branch_predicted", + sort_callchain_branch_predicted), + DIM(SORT_CALLCHAIN_BRANCH_ABORT, + "callchain_branch_abort", + sort_callchain_branch_abort), + DIM(SORT_CALLCHAIN_BRANCH_CYCLES, + "callchain_branch_cycles", + sort_callchain_branch_cycles) }; #undef DIM @@ -3484,7 +3589,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if ((sort__mode != SORT_MODE__BRANCH) && + strncasecmp(tok, "callchain_branch_predicted", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_abort", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_cycles", + strlen(tok))) return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9ff68c6786e7..a8572574e168 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -88,6 +88,9 @@ enum sort_type { SORT_SYM_IPC, SORT_ADDR_FROM, SORT_ADDR_TO, + SORT_CALLCHAIN_BRANCH_PREDICTED, + SORT_CALLCHAIN_BRANCH_ABORT, + SORT_CALLCHAIN_BRANCH_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ea96e4ebad8c..53dcdf07f5a2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -7,6 +7,7 @@ #include <perf/cpumap.h> #include "color.h" #include "counts.h" +#include "debug.h" #include "evlist.h" #include "evsel.h" #include "stat.h" @@ -21,6 +22,7 @@ #include "iostat.h" #include "pmu.h" #include "pmus.h" +#include "tool_pmu.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -71,6 +73,32 @@ static const char *aggr_header_std[] = { [AGGR_GLOBAL] = "" }; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh) +{ + const char * const colors[] = { + "", /* unknown */ + PERF_COLOR_RED, /* bad */ + PERF_COLOR_MAGENTA, /* nearly bad */ + PERF_COLOR_YELLOW, /* less good */ + PERF_COLOR_GREEN, /* good */ + }; + static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return colors[thresh]; +} + +static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh) +{ + const char * const strs[] = { + "unknown", + "bad", + "nearly bad", + "less good", + "good", + }; + static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return strs[thresh]; +} + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -403,13 +431,14 @@ static void do_new_line_std(struct perf_stat_config *config, } static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; int n; bool newline = os->newline; + const char *color = metric_threshold_classify__color(thresh); os->newline = false; @@ -441,7 +470,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) static void print_metric_csv(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -462,15 +491,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, static void print_metric_json(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh, const char *fmt __maybe_unused, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : \"%f\", ", val); - fprintf(out, "\"metric-unit\" : \"%s\"", unit); + if (unit) { + fprintf(out, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit); + if (thresh != METRIC_THRESHOLD_UNKNOWN) { + fprintf(out, ", \"metric-threshold\" : \"%s\"", + metric_threshold_classify__str(thresh)); + } + } if (!config->metric_only) fprintf(out, "}"); } @@ -557,13 +591,14 @@ static const char *fixunit(char *buf, struct evsel *evsel, } static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[1024], str[1024]; unsigned mlen = config->metric_only_len; + const char *color = metric_threshold_classify__color(thresh); if (!valid_only_metric(unit)) return; @@ -580,7 +615,8 @@ static void print_metric_only(struct perf_stat_config *config, } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { @@ -602,25 +638,29 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - char buf[64], *vals, *ends; + char buf[64], *ends; char tbuf[1024]; + const char *vals; if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); + if (!unit[0]) + return; snprintf(buf, sizeof(buf), fmt ?: "", val); - ends = vals = skip_spaces(buf); + vals = ends = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - if (!unit[0] || !vals[0]) - return; + if (!vals[0]) + vals = "none"; fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); os->first = false; } @@ -631,7 +671,8 @@ static void new_line_metric(struct perf_stat_config *config __maybe_unused, } static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { @@ -805,7 +846,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, os, NULL, "", "", 0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0); return; } @@ -860,7 +901,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out, &config->metric_events); } else { - pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0); } if (!config->metric_only) { @@ -871,38 +912,66 @@ static void printout(struct perf_stat_config *config, struct outstate *os, static void uniquify_event_name(struct evsel *counter) { - char *new_name; - char *config; - int ret = 0; + const char *name, *pmu_name; + char *new_name, *config; + int ret; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + /* The evsel has a "name=" config term or is from libpfm. */ + if (counter->use_config_name || counter->is_libpfm_event) + return; + + /* Legacy no PMU event, don't uniquify. */ + if (!counter->pmu || + (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW)) + return; - if (counter->uniquified_name || counter->use_config_name || - !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name, - strlen(counter->pmu_name))) + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX) return; - config = strchr(counter->name, '/'); + name = evsel__name(counter); + pmu_name = counter->pmu->name; + /* Already prefixed by the PMU name. */ + if (!strncmp(name, pmu_name, strlen(pmu_name))) + return; + + config = strchr(name, '/'); if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (evsel__is_hybrid(counter)) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); + int len = config - name; + + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); } else { - ret = asprintf(&new_name, "%s [%s]", - counter->name, counter->pmu_name); + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + int len = config - name; - if (ret) { - free(counter->name); - counter->name = new_name; + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); } } - - counter->uniquified_name = true; + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } } static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) @@ -940,15 +1009,29 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, int idx; /* + * Skip unsupported default events when not verbose. (default events + * are all marked 'skippable'). + */ + if (verbose == 0 && counter->skippable && !counter->supported) + return true; + + /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. */ if (config->aggr_mode == AGGR_THREAD && config->system_wide) return true; - /* Tool events have the software PMU but are only gathered on 1. */ - if (evsel__is_tool(counter)) - return true; + /* + * Many tool events are only gathered on the first index, skip other + * zero values. + */ + if (evsel__is_tool(counter)) { + struct aggr_cpu_id own_id = + config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 }); + + return !aggr_cpu_id__equal(id, &own_id); + } /* * Skip value 0 when it's an uncore event and the given aggr id @@ -1559,6 +1642,31 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_metric_end(config, os); } +static void disable_uniquify(struct evlist *evlist) +{ + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; + + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return; + } + } + evlist__for_each_entry_continue(evlist, counter) { + counter->uniquified_name = true; + } +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) @@ -1572,6 +1680,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf .first = true, }; + disable_uniquify(evlist); + if (config->iostat_run) evlist->selected = evlist__first(evlist); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99376c12dd8e..47718610d5d8 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -15,6 +15,7 @@ #include <linux/zalloc.h> #include "iostat.h" #include "util/hashmap.h" +#include "tool_pmu.h" struct stats walltime_nsecs_stats; struct rusage_stats ru_stats; @@ -76,7 +77,7 @@ void perf_stat__reset_shadow_stats(void) memset(&ru_stats, 0, sizeof(ru_stats)); } -static enum stat_type evsel__stat_type(const struct evsel *evsel) +static enum stat_type evsel__stat_type(struct evsel *evsel) { /* Fake perf_hw_cache_op_id values for use with evsel__match. */ u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | @@ -136,23 +137,19 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel) return STAT_NONE; } -static const char *get_ratio_color(const double ratios[3], double val) +static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val) { - const char *color = PERF_COLOR_NORMAL; + assert(ratios[0] > ratios[1]); + assert(ratios[1] > ratios[2]); - if (val > ratios[0]) - color = PERF_COLOR_RED; - else if (val > ratios[1]) - color = PERF_COLOR_MAGENTA; - else if (val > ratios[2]) - color = PERF_COLOR_YELLOW; - - return color; + return val > ratios[1] + ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD) + : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD); } static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) { - const struct evsel *cur; + struct evsel *cur; int evsel_ctx = evsel_context(evsel); evlist__for_each_entry(evsel->evlist, cur) { @@ -195,17 +192,21 @@ static void print_ratio(struct perf_stat_config *config, const struct evsel *evsel, int aggr_idx, double numerator, struct perf_stat_output_ctx *out, enum stat_type denominator_type, - const double color_ratios[3], const char *unit) + const double thresh_ratios[3], const char *_unit) { double denominator = find_stat(evsel, aggr_idx, denominator_type); + double ratio = 0; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; + const char *fmt = NULL; + const char *unit = NULL; if (numerator && denominator) { - double ratio = numerator / denominator * 100.0; - const char *color = get_ratio_color(color_ratios, ratio); - - out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, unit, 0); + ratio = numerator / denominator * 100.0; + thresh = get_ratio_thresh(thresh_ratios, ratio); + fmt = "%7.2f%%"; + unit = _unit; + } + out->print_metric(config, out->ctx, thresh, fmt, unit, ratio); } static void print_stalled_cycles_front(struct perf_stat_config *config, @@ -213,9 +214,9 @@ static void print_stalled_cycles_front(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {50.0, 30.0, 10.0}; + const double thresh_ratios[3] = {50.0, 30.0, 10.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "frontend cycles idle"); } @@ -224,9 +225,9 @@ static void print_stalled_cycles_back(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {75.0, 50.0, 20.0}; + const double thresh_ratios[3] = {75.0, 50.0, 20.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "backend cycles idle"); } @@ -235,9 +236,9 @@ static void print_branch_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios, "of all branches"); } @@ -246,9 +247,9 @@ static void print_l1d_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios, "of all L1-dcache accesses"); } @@ -257,9 +258,9 @@ static void print_l1i_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios, "of all L1-icache accesses"); } @@ -268,9 +269,9 @@ static void print_ll_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios, "of all LL-cache accesses"); } @@ -279,9 +280,9 @@ static void print_dtlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios, "of all dTLB cache accesses"); } @@ -290,9 +291,9 @@ static void print_itlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios, "of all iTLB cache accesses"); } @@ -301,9 +302,9 @@ static void print_cache_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios, "of all cache refs"); } @@ -319,15 +320,16 @@ static void print_instructions(struct perf_stat_config *config, find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); if (cycles) { - print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", - instructions / cycles); - } else - print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); - + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "insn per cycle", instructions / cycles); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "insn per cycle", 0); + } if (max_stalled && instructions) { out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", - max_stalled / instructions); + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "stalled cycles per insn", max_stalled / instructions); } } @@ -341,9 +343,12 @@ static void print_cycles(struct perf_stat_config *config, if (cycles && nsecs) { double ratio = cycles / nsecs; - out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + "GHz", ratio); + } else { + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "GHz", 0); + } } static void print_nsecs(struct perf_stat_config *config, @@ -356,10 +361,12 @@ static void print_nsecs(struct perf_stat_config *config, double wall_time = avg_stats(&walltime_nsecs_stats); if (wall_time) { - print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized", nsecs / (wall_time * evsel->scale)); - } else - print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "CPUs utilized", 0); + } } static int prepare_metric(const struct metric_expr *mexp, @@ -381,26 +388,35 @@ static int prepare_metric(const struct metric_expr *mexp, double scale; switch (evsel__tool_event(metric_events[i])) { - case PERF_TOOL_DURATION_TIME: + case TOOL_PMU__EVENT_DURATION_TIME: stats = &walltime_nsecs_stats; scale = 1e-9; break; - case PERF_TOOL_USER_TIME: + case TOOL_PMU__EVENT_USER_TIME: stats = &ru_stats.ru_utime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_SYSTEM_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: stats = &ru_stats.ru_stime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_NONE: + case TOOL_PMU__EVENT_NONE: pr_err("Invalid tool event 'none'"); abort(); - case PERF_TOOL_MAX: + case TOOL_PMU__EVENT_MAX: pr_err("Invalid tool event 'max'"); abort(); + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: default: - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); + pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i])); abort(); } val = avg_stats(stats) * scale; @@ -483,7 +499,7 @@ static void generic_metric(struct perf_stat_config *config, double ratio, scale, threshold; int i; void *ctxp = out->ctx; - const char *color = NULL; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; pctx = expr__ctx_new(); if (!pctx) @@ -501,13 +517,13 @@ static void generic_metric(struct perf_stat_config *config, if (!metric_events[i]) { if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; - char metric_bf[64]; + char metric_bf[128]; if (metric_threshold && expr__parse(&threshold, pctx, metric_threshold) == 0 && !isnan(threshold)) { - color = fpclassify(threshold) == FP_ZERO - ? PERF_COLOR_GREEN : PERF_COLOR_RED; + thresh = fpclassify(threshold) == FP_ZERO + ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD; } if (metric_unit && metric_name) { @@ -522,22 +538,22 @@ static void generic_metric(struct perf_stat_config *config, scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); - print_metric(config, ctxp, color, "%8.1f", + print_metric(config, ctxp, thresh, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, color, "%8.2f", + print_metric(config, ctxp, thresh, "%8.2f", metric_name ? metric_name : out->force_header ? evsel->name : "", ratio); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } @@ -573,7 +589,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, { bool need_full_name = perf_pmus__num_core_pmus() > 1; static const char *last_name; - static const char *last_pmu; + static const struct perf_pmu *last_pmu; char full_name[64]; /* @@ -584,21 +600,21 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * different metric events. */ if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { + if (!need_full_name || last_pmu != evsel->pmu) { out->print_metricgroup_header(config, ctxp, NULL); return; } } - if (need_full_name) - scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + if (need_full_name && evsel->pmu) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name); else scnprintf(full_name, sizeof(full_name), "%s", name); out->print_metricgroup_header(config, ctxp, full_name); last_name = name; - last_pmu = evsel->pmu_name; + last_pmu = evsel->pmu; } /** @@ -708,17 +724,21 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (unit != ' ') snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); - } else + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + unit_buf, ratio); + } else { num = 0; + } } } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, &num, NULL, out, metric_events); - if (num == 0) - print_metric(config, ctxp, NULL, NULL, NULL, 0); + if (num == 0) { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, + /*fmt=*/NULL, /*unit=*/NULL, 0); + } } /** diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0bd5467389e4..7c2ccdcc3fdb 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -553,7 +553,7 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) return false; - return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); + return evsel_a->pmu != evsel_b->pmu; } static void evsel__merge_aliases(struct evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fd7a187551bd..6f8cff3cd39a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -154,9 +154,21 @@ struct evlist; extern struct stats walltime_nsecs_stats; extern struct rusage_stats ru_stats; +enum metric_threshold_classify { + METRIC_THRESHOLD_UNKNOWN, + METRIC_THRESHOLD_BAD, + METRIC_THRESHOLD_NEARLY_BAD, + METRIC_THRESHOLD_LESS_GOOD, + METRIC_THRESHOLD_GOOD, +}; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh); + typedef void (*print_metric_t)(struct perf_stat_config *config, - void *ctx, const char *color, const char *unit, - const char *fmt, double val); + void *ctx, + enum metric_threshold_classify thresh, + const char *fmt, + const char *unit, + double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); /* Used to print the display name of the Default metricgroup for now. */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 116a642ad99d..308fc7ec88cc 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -263,6 +263,34 @@ char *strpbrk_esc(char *str, const char *stopset) return ptr; } +/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */ +char *strpbrk_esq(char *str, const char *stopset) +{ + char *_stopset = NULL; + char *ptr; + const char *squote = "'"; + const char *dquote = "\""; + + if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0) + return NULL; + + do { + ptr = strpbrk_esc(str, _stopset); + if (!ptr) + break; + if (*ptr == *squote) + ptr = strpbrk_esc(ptr + 1, squote); + else if (*ptr == *dquote) + ptr = strpbrk_esc(ptr + 1, dquote); + else + break; + str = ptr + 1; + } while (ptr); + + free(_stopset); + return ptr; +} + /* Like strdup, but do not copy a single backslash */ char *strdup_esc(const char *str) { @@ -293,6 +321,78 @@ char *strdup_esc(const char *str) return ret; } +/* Remove backslash right before quote and return next quote address. */ +static char *remove_consumed_esc(char *str, int len, int quote) +{ + char *ptr = str, *end = str + len; + + while (*ptr != quote && ptr < end) { + if (*ptr == '\\' && *(ptr + 1) == quote) { + memmove(ptr, ptr + 1, end - (ptr + 1)); + /* now *ptr is `quote`. */ + end--; + } + ptr++; + } + + return *ptr == quote ? ptr : NULL; +} + +/* + * Like strdup_esc, but keep quoted string as it is (and single backslash + * before quote is removed). If there is no closed quote, return NULL. + */ +char *strdup_esq(const char *str) +{ + char *d, *ret; + + /* If there is no quote, return normal strdup_esc() */ + d = strpbrk_esc((char *)str, "\"'"); + if (!d) + return strdup_esc(str); + + ret = strdup(str); + if (!ret) + return NULL; + + d = ret; + do { + d = strpbrk(d, "\\\"\'"); + if (!d) + break; + + if (*d == '"' || *d == '\'') { + /* This is non-escaped quote */ + int quote = *d; + int len = strlen(d + 1) + 1; + + /* + * Remove the start quote and remove consumed escape (backslash + * before quote) and remove the end quote. If there is no end + * quote, it is the input error. + */ + memmove(d, d + 1, len); + d = remove_consumed_esc(d, len, quote); + if (!d) + goto error; + memmove(d, d + 1, strlen(d + 1) + 1); + } + if (*d == '\\') { + memmove(d, d + 1, strlen(d + 1) + 1); + if (*d == '\\') { + /* double backslash -- keep the second one. */ + d++; + } + } + } while (*d != '\0'); + + return ret; + +error: + free(ret); + return NULL; +} + unsigned int hex(char c) { if (c >= '0' && c <= '9') diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 52cb8ba057c7..4c8bff47cfd3 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -37,6 +37,8 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +char *strpbrk_esq(char *str, const char *stopset); +char *strdup_esq(const char *str); unsigned int hex(char c); char *strreplace_chars(char needle, const char *haystack, const char *replace); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a18927d792af..0037f1163919 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -257,7 +257,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) * like in: * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] */ - if (prev->end == prev->start) { + if (prev->end == prev->start && prev->type != STT_NOTYPE) { const char *prev_mod; const char *curr_mod; @@ -1931,6 +1931,9 @@ int dso__load(struct dso *dso, struct map *map) if (next_slot) { ss_pos++; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) + dso__set_binary_type(dso, symtab_type); + if (syms_ss && runtime_ss) break; } else { diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 7c15dec6900d..69d8dcf5cf28 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -46,6 +46,15 @@ static const char *const *syscalltbl_native = syscalltbl_mips_n64; #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; static const char *const *syscalltbl_native = syscalltbl_loongarch; +#elif defined(__riscv) +#include <asm/syscalls.c> +const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID; +static const char *const *syscalltbl_native = syscalltbl_riscv; +#else +const int syscalltbl_native_max_id = 0; +static const char *const syscalltbl_native[] = { + [0] = "unknown", +}; #endif struct syscall { @@ -182,6 +191,11 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) return audit_name_to_syscall(name, tbl->audit_machine); } +int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx) +{ + return idx; +} + int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, const char *syscall_glob __maybe_unused, int *idx __maybe_unused) { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..2ee2cc30340f 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c index ff2b169e0085..6ca0b178fb6c 100644 --- a/tools/perf/util/threads.c +++ b/tools/perf/util/threads.c @@ -141,7 +141,7 @@ void threads__remove_all_threads(struct threads *threads) down_write(&table->lock); __threads_table_entry__set_last_match(table, NULL); - hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) { + hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) { struct thread *old_value; hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); @@ -175,7 +175,7 @@ int threads__for_each_thread(struct threads *threads, size_t bkt; down_read(&table->lock); - hashmap__for_each_entry((&table->shard), cur, bkt) { + hashmap__for_each_entry(&table->shard, cur, bkt) { int rc = fn((struct thread *)cur->pvalue, data); if (rc != 0) { diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c new file mode 100644 index 000000000000..4fb097578479 --- /dev/null +++ b/tools/perf/util/tool_pmu.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "cgroup.h" +#include "counts.h" +#include "cputopo.h" +#include "evsel.h" +#include "pmu.h" +#include "print-events.h" +#include "smt.h" +#include "time-utils.h" +#include "tool_pmu.h" +#include "tsc.h" +#include <api/fs/fs.h> +#include <api/io.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <fcntl.h> +#include <strings.h> + +static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", + "has_pmem", + "num_cores", + "num_cpus", + "num_cpus_online", + "num_dies", + "num_packages", + "slots", + "smt_on", + "system_tsc_freq", +}; + +bool tool_pmu__skip_event(const char *name __maybe_unused) +{ +#if !defined(__aarch64__) + /* The slots event should only appear on arm64. */ + if (strcasecmp(name, "slots") == 0) + return true; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + /* The system_tsc_freq event should only appear on x86. */ + if (strcasecmp(name, "system_tsc_freq") == 0) + return true; +#endif + return false; +} + +int tool_pmu__num_skip_events(void) +{ + int num = 0; + +#if !defined(__aarch64__) + num++; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + num++; +#endif + return num; +} + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev) +{ + if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) + return tool_pmu__event_names[ev]; + + return NULL; +} + +enum tool_pmu_event tool_pmu__str_to_event(const char *str) +{ + int i; + + if (tool_pmu__skip_event(str)) + return TOOL_PMU__EVENT_NONE; + + tool_pmu__for_each_event(i) { + if (!strcasecmp(str, tool_pmu__event_names[i])) + return i; + } + return TOOL_PMU__EVENT_NONE; +} + +bool perf_pmu__is_tool(const struct perf_pmu *pmu) +{ + return pmu && pmu->type == PERF_PMU_TYPE_TOOL; +} + +bool evsel__is_tool(const struct evsel *evsel) +{ + return perf_pmu__is_tool(evsel->pmu); +} + +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) +{ + if (!evsel__is_tool(evsel)) + return TOOL_PMU__EVENT_NONE; + + return (enum tool_pmu_event)evsel->core.attr.config; +} + +const char *evsel__tool_pmu_event_name(const struct evsel *evsel) +{ + return tool_pmu__event_to_str(evsel->core.attr.config); +} + +static bool read_until_char(struct io *io, char e) +{ + int c; + + do { + c = io__get_char(io); + if (c == -1) + return false; + } while (c != e); + return true; +} + +static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int i; + + io__init(&io, fd, buf, sizeof(buf)); + + /* Skip lines to relevant CPU. */ + for (i = -1; i < cpu.cpu; i++) { + if (!read_until_char(&io, '\n')) + return -EINVAL; + } + /* Skip to "cpu". */ + if (io__get_char(&io) != 'c') return -EINVAL; + if (io__get_char(&io) != 'p') return -EINVAL; + if (io__get_char(&io) != 'u') return -EINVAL; + + /* Skip N of cpuN. */ + if (!read_until_char(&io, ' ')) + return -EINVAL; + + i = 1; + while (true) { + if (io__get_dec(&io, val) != ' ') + break; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +static int read_pid_stat_field(int fd, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int c, i; + + io__init(&io, fd, buf, sizeof(buf)); + if (io__get_dec(&io, val) != ' ') + return -EINVAL; + if (field == 1) + return 0; + + /* Skip comm. */ + if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) + return -EINVAL; + if (field == 2) + return -EINVAL; /* String can't be returned. */ + + /* Skip state */ + if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) + return -EINVAL; + if (field == 3) + return -EINVAL; /* String can't be returned. */ + + /* Loop over numeric fields*/ + if (io__get_char(&io) != ' ') + return -EINVAL; + + i = 4; + while (true) { + c = io__get_dec(&io, val); + if (c == -1) + return -EINVAL; + if (c == -2) { + /* Assume a -ve was read */ + c = io__get_dec(&io, val); + *val *= -1; + } + if (c != ' ') + return -EINVAL; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads) +{ + if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && + !evsel->start_times) { + evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), + nthreads, + sizeof(__u64)); + if (!evsel->start_times) + return -ENOMEM; + } + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + enum tool_pmu_event ev = evsel__tool_event(evsel); + int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; + + if (ev == TOOL_PMU__EVENT_NUM_CPUS) + return 0; + + if (ev == TOOL_PMU__EVENT_DURATION_TIME) { + if (evsel->core.attr.sample_period) /* no sampling */ + return -EINVAL; + evsel->start_time = rdclock(); + return 0; + } + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + if (thread >= nthreads) + break; + + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { + bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; + __u64 *start_time = NULL; + int fd; + + if (evsel->core.attr.sample_period) { + /* no sampling */ + err = -EINVAL; + goto out_close; + } + if (pid > -1) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); + fd = open(buf, O_RDONLY); + evsel->pid_stat = true; + } else { + fd = open("/proc/stat", O_RDONLY); + } + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + start_time = xyarray__entry(evsel->start_times, idx, thread); + if (pid > -1) { + err = read_pid_stat_field(fd, system ? 15 : 14, + start_time); + } else { + struct perf_cpu cpu; + + cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); + err = read_stat_field(fd, cpu, system ? 3 : 1, + start_time); + } + if (err) + goto out_close; + } + + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + old_errno = errno; + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + errno = old_errno; + return err; +} + +#if !defined(__i386__) && !defined(__x86_64__) +u64 arch_get_tsc_freq(void) +{ + return 0; +} +#endif + +#if !defined(__aarch64__) +u64 tool_pmu__cpu_slots_per_cycle(void) +{ + return 0; +} +#endif + +static bool has_pmem(void) +{ + static bool has_pmem, cached; + const char *sysfs = sysfs__mountpoint(); + char path[PATH_MAX]; + + if (!cached) { + snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); + has_pmem = access(path, F_OK) == 0; + cached = true; + } + return has_pmem; +} + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +{ + const struct cpu_topology *topology; + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + *result = has_pmem() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NUM_CORES: + topology = online_topology(); + *result = topology->core_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS: + *result = cpu__max_present_cpu().cpu; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { + struct perf_cpu_map *online = cpu_map__online(); + + if (online) { + *result = perf_cpu_map__nr(online); + return true; + } + return false; + } + case TOOL_PMU__EVENT_NUM_DIES: + topology = online_topology(); + *result = topology->die_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_PACKAGES: + topology = online_topology(); + *result = topology->package_cpus_lists; + return true; + + case TOOL_PMU__EVENT_SLOTS: + *result = tool_pmu__cpu_slots_per_cycle(); + return *result ? true : false; + + case TOOL_PMU__EVENT_SMT_ON: + *result = smt_on() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + *result = arch_get_tsc_freq(); + return true; + + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_DURATION_TIME: + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: + case TOOL_PMU__EVENT_MAX: + default: + return false; + } +} + +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 *start_time, cur_time, delta_start; + u64 val; + int fd, err = 0; + struct perf_counts_values *count, *old_count = NULL; + bool adjust = false; + enum tool_pmu_event ev = evsel__tool_event(evsel); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + val = 0; + if (cpu_map_idx == 0 && thread == 0) { + if (!tool_pmu__read_event(ev, &val)) { + count->lost++; + val = 0; + } + } + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; + } + return 0; + case TOOL_PMU__EVENT_DURATION_TIME: + /* + * Pretend duration_time is only on the first CPU and thread, or + * else aggregation will scale duration_time by the number of + * CPUs/threads. + */ + start_time = &evsel->start_time; + if (cpu_map_idx == 0 && thread == 0) + cur_time = rdclock(); + else + cur_time = *start_time; + break; + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: { + bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + + start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + lseek(fd, SEEK_SET, 0); + if (evsel->pid_stat) { + /* The event exists solely on 1 CPU. */ + if (cpu_map_idx == 0) + err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); + else + cur_time = 0; + } else { + /* The event is for all threads. */ + if (thread == 0) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, + cpu_map_idx); + + err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); + } else { + cur_time = 0; + } + } + adjust = true; + break; + } + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_MAX: + default: + err = -EINVAL; + } + if (err) + return err; + + delta_start = cur_time - *start_time; + if (adjust) { + __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + + delta_start *= 1000000000 / ticks_per_sec; + } + count->val = delta_start; + count->ena = count->run = delta_start; + count->lost = 0; + return 0; +} + +struct perf_pmu *perf_pmus__tool_pmu(void) +{ + static struct perf_pmu tool = { + .name = "tool", + .type = PERF_PMU_TYPE_TOOL, + .aliases = LIST_HEAD_INIT(tool.aliases), + .caps = LIST_HEAD_INIT(tool.caps), + .format = LIST_HEAD_INIT(tool.format), + }; + if (!tool.events_table) + tool.events_table = find_core_events_table("common", "common"); + + return &tool; +} diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h new file mode 100644 index 000000000000..a60184859080 --- /dev/null +++ b/tools/perf/util/tool_pmu.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOL_PMU_H +#define __TOOL_PMU_H + +#include "pmu.h" + +struct evsel; +struct perf_thread_map; +struct print_callbacks; + +enum tool_pmu_event { + TOOL_PMU__EVENT_NONE = 0, + TOOL_PMU__EVENT_DURATION_TIME, + TOOL_PMU__EVENT_USER_TIME, + TOOL_PMU__EVENT_SYSTEM_TIME, + TOOL_PMU__EVENT_HAS_PMEM, + TOOL_PMU__EVENT_NUM_CORES, + TOOL_PMU__EVENT_NUM_CPUS, + TOOL_PMU__EVENT_NUM_CPUS_ONLINE, + TOOL_PMU__EVENT_NUM_DIES, + TOOL_PMU__EVENT_NUM_PACKAGES, + TOOL_PMU__EVENT_SLOTS, + TOOL_PMU__EVENT_SMT_ON, + TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + + TOOL_PMU__EVENT_MAX, +}; + +#define tool_pmu__for_each_event(ev) \ + for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++) + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev); +enum tool_pmu_event tool_pmu__str_to_event(const char *str); +bool tool_pmu__skip_event(const char *name); +int tool_pmu__num_skip_events(void); + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); + +u64 tool_pmu__cpu_slots_per_cycle(void); + +bool perf_pmu__is_tool(const struct perf_pmu *pmu); + +bool evsel__is_tool(const struct evsel *evsel); +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); +const char *evsel__tool_pmu_event_name(const struct evsel *evsel); +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads); +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +struct perf_pmu *perf_pmus__tool_pmu(void); + +#endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index f0332bd3a501..41d53e1b43e7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -12,7 +12,7 @@ #include <linux/ctype.h> #include <linux/kernel.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -116,12 +116,6 @@ void event_format__fprintf(struct tep_event *event, trace_seq_destroy(&s); } -void event_format__print(struct tep_event *event, - int cpu, void *data, int size) -{ - return event_format__fprintf(event, cpu, data, size, stdout); -} - /* * prev_state is of size long, which is 32 bits on 32 bit architectures. * As it needs to have the same bits for both 32 bit and 64 bit architectures diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1162c49b8082..ecbbb93f0185 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index bd0000300c77..5596fcda2c10 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -10,7 +10,7 @@ #include <string.h> #include <errno.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ad75b31e09b..6a8c66c64b70 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -8,7 +8,7 @@ #include <fcntl.h> #include <linux/kernel.h> #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <api/fs/tracing_path.h> #include <api/fs/fs.h> #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index bbf8b26bc8da..79b939f947dd 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -42,9 +42,6 @@ struct tep_event *trace_event__tp_format_id(int id); void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct tep_event *event, - int cpu, void *data, int size); - int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); @@ -150,7 +147,7 @@ int common_lock_depth(struct scripting_context *context); int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) -#include <traceevent/event-parse.h> +#include <event-parse.h> static inline bool tep_field_is_relative(unsigned long flags) { diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 2e33a20e1e1b..511a517ce613 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) size_t ret; ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); - ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult); ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); /* diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 88fd1c4c1cb8..57ce8449647f 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -double arch_get_tsc_freq(void); +u64 arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9d55a13787ce..0f031eb80b4c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) @@ -336,91 +342,6 @@ bool perf_event_paranoid_check(int max_level) return perf_event_paranoid() <= max_level; } -static int -fetch_ubuntu_kernel_version(unsigned int *puint) -{ - ssize_t len; - size_t line_len = 0; - char *ptr, *line = NULL; - int version, patchlevel, sublevel, err; - FILE *vsig; - - if (!puint) - return 0; - - vsig = fopen("/proc/version_signature", "r"); - if (!vsig) { - pr_debug("Open /proc/version_signature failed: %s\n", - strerror(errno)); - return -1; - } - - len = getline(&line, &line_len, vsig); - fclose(vsig); - err = -1; - if (len <= 0) { - pr_debug("Reading from /proc/version_signature failed: %s\n", - strerror(errno)); - goto errout; - } - - ptr = strrchr(line, ' '); - if (!ptr) { - pr_debug("Parsing /proc/version_signature failed: %s\n", line); - goto errout; - } - - err = sscanf(ptr + 1, "%d.%d.%d", - &version, &patchlevel, &sublevel); - if (err != 3) { - pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", - line); - goto errout; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - err = 0; -errout: - free(line); - return err; -} - -int -fetch_kernel_version(unsigned int *puint, char *str, - size_t str_size) -{ - struct utsname utsname; - int version, patchlevel, sublevel, err; - bool int_ver_ready = false; - - if (access("/proc/version_signature", R_OK) == 0) - if (!fetch_ubuntu_kernel_version(puint)) - int_ver_ready = true; - - if (uname(&utsname)) - return -1; - - if (str && str_size) { - strncpy(str, utsname.release, str_size); - str[str_size - 1] = '\0'; - } - - if (!puint || int_ver_ready) - return 0; - - err = sscanf(utsname.release, "%d.%d.%d", - &version, &patchlevel, &sublevel); - - if (err != 3) { - pr_debug("Unable to get kernel version from uname '%s'\n", - utsname.release); - return -1; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - return 0; -} - int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9966c21aaf04..3423778e39a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -21,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest; @@ -43,14 +46,6 @@ int sysctl__max_stack(void); bool sysctl__nmi_watchdog_enabled(void); -int fetch_kernel_version(unsigned int *puint, - char *str, size_t str_sz); -#define KVER_VERSION(x) (((x) >> 16) & 0xff) -#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) -#define KVER_SUBLEVEL(x) ((x) & 0xff) -#define KVER_FMT "%d.%d.%d" -#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) - int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT @@ -81,13 +76,6 @@ char *perf_exe(char *buf, int len); #endif #endif -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags); - struct perf_debuginfod { const char *urls; bool set; diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 1b6f8f6db7aa..c12f5d8c4bf6 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine, if (!dso) { dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); - if (dso && dso_type != dso__type(dso, machine)) + if (dso && dso_type != dso__type(dso, machine)) { + dso__put(dso); dso = NULL; + } } break; case DSO__TYPE_X32BIT: diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 7677329c42a6..5113d5a7aee0 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -27,3 +27,6 @@ debug/i386/intel_gsic debug/i386/powernow-k8-decode debug/x86_64/centrino-decode debug/x86_64/powernow-k8-decode + +# Clang's compilation database file +compile_commands.json diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 6c02f401069e..175004ce44b2 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -57,7 +57,7 @@ LIB_MIN= 1 PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org -LANGUAGES = de fr it cs pt ka +LANGUAGES = de fr it cs pt ka zh_CN # Directory definitions. These are default and most probably @@ -86,12 +86,12 @@ INSTALL_SCRIPT = ${INSTALL} -m 644 # If you are running a cross compiler, you may want to set this # to something more interesting, like "arm-linux-". If you want # to compile vs uClibc, that can be done here as well. -CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- -CC = $(CROSS)gcc -LD = $(CROSS)gcc -AR = $(CROSS)ar -STRIP = $(CROSS)strip -RANLIB = $(CROSS)ranlib +CROSS ?= #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- +CC ?= $(CROSS)gcc +LD ?= $(CROSS)gcc +AR ?= $(CROSS)ar +STRIP ?= $(CROSS)strip +RANLIB ?= $(CROSS)ranlib HOSTCC = gcc MKDIR = mkdir @@ -218,17 +218,28 @@ else endif $(QUIET) $(STRIPCMD) $@ +ifeq (, $(shell which xgettext)) +$(warning "Install xgettext to extract translatable strings.") +else $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) $(ECHO) " GETTEXT " $@ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \ --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F) +endif +ifeq (, $(shell which msgfmt)) +$(warning "Install msgfmt to generate binary message catalogs.") +else $(OUTPUT)po/%.gmo: po/%.po $(ECHO) " MSGFMT " $@ $(QUIET) msgfmt -o $@ po/$*.po +endif create-gmo: ${GMO_FILES} +ifeq (, $(shell which msgmerge)) +$(warning "Install msgmerge to merge translations.") +else update-po: $(OUTPUT)po/$(PACKAGE).pot $(ECHO) " MSGMRG " $@ $(QUIET) @for HLANG in $(LANGUAGES); do \ @@ -241,6 +252,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot rm -f $(OUTPUT)po/$$HLANG.new.po; \ fi; \ done; +endif compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index e63dc11fa3a5..080678d9d74e 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -4,6 +4,7 @@ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de> */ +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -165,8 +166,8 @@ int prepare_config(const char *path, struct config *config) configfile = fopen(path, "r"); if (configfile == NULL) { - perror("fopen"); - fprintf(stderr, "error: unable to read configfile\n"); + fprintf(stderr, "error: unable to read configfile: %s, %s\n", + path, strerror(errno)); free(config); return 1; } diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py index 3d6f62b9556a..ca5aa46c9b20 100755 --- a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py +++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py @@ -15,22 +15,38 @@ else: print(f"cstate count error: return code: {cpu_cstates_count}") """ -Disable cstate (will fail if the above is 0, ex: a virtual machine) +Disable cstate (will fail if the above returns is under 1, ex: a virtual machine) """ cstate_disabled = p.cpuidle_state_disable(0, 0, 1) -if cpu_cstates_count == 0: - print(f"CPU 0 has {cpu_cstates_count} c-states") -else: - print(f"cstate count error: return code: {cpu_cstates_count}") match cstate_disabled: case 0: print(f"CPU state disabled") case -1: print(f"Idlestate not available") + case -2: + print(f"Disabling is not supported by the kernel") + case -3: + print(f"No write access to disable/enable C-states: try using sudo") case _: - print(f"Not documented") + print(f"Not documented: {cstate_disabled}") + +""" +Test cstate is disabled +""" +is_cstate_disabled = p.cpuidle_is_state_disabled(0, 0) +match is_cstate_disabled: + case 1: + print(f"CPU is disabled") + case 0: + print(f"CPU is enabled") + case -1: + print(f"Idlestate not available") + case -2: + print(f"Disabling is not supported by kernel") + case _: + print(f"Not documented: {is_cstate_disabled}") # Pointer example diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 2bcc696f4496..500653ef98c7 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -3,7 +3,7 @@ cpupower\-set \- Set processor power related kernel or hardware configurations .SH SYNOPSIS .ft B -.B cpupower set [ \-b VAL ] +.B cpupower set [ \-b VAL | \-e POLICY | \-m MODE | \-t BOOL ] .SH DESCRIPTION @@ -19,7 +19,7 @@ described in the Options sections. Use \fBcpupower info \fP to read out current settings and whether they are supported on the system at all. -.SH Options +.SH OPTIONS .PP \-\-perf-bias, \-b .RS 4 @@ -56,6 +56,40 @@ Use \fBcpupower -c all info -b\fP to verify. This options needs the msr kernel driver (CONFIG_X86_MSR) loaded. .RE +.PP +\-\-epp, \-e +.RS 4 +Sets the energy performance policy preference on supported Intel or AMD +processors which use the Intel or AMD P-State cpufreq driver respectively. + +Available policies can be found with +\fBcat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences\fP : +.RS 4 +default performance balance_performance balance_power power +.RE + +.RE + +.PP +\-\-amd\-pstate\-mode, \-m +.RS 4 +Sets the AMD P-State mode for supported AMD processors. +Available modes are "active", "guided" or "passive". + +Refer to the AMD P-State kernel documentation for further information. + +.RE + +.PP +\-\-turbo\-boost, \-t +.RS 4 +This option is used to enable or disable the turbo boost feature on +supported Intel and AMD processors. + +This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. + +.RE + .SH "SEE ALSO" cpupower-info(1), cpupower-monitor(1), powertop(1) .PP diff --git a/tools/power/cpupower/po/zh_CN.po b/tools/power/cpupower/po/zh_CN.po new file mode 100644 index 000000000000..0489abffb702 --- /dev/null +++ b/tools/power/cpupower/po/zh_CN.po @@ -0,0 +1,942 @@ +# Chinese Simplified translations for cpufrequtils package +# Copyright (C) 2004 THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the cpufrequtils package. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: cpufrequtils 006\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2011-03-08 17:03+0100\n" +"PO-Revision-Date: 2024-05-22 15:36+0000\n" +"Last-Translator: Kieran Moy <kfatyuip@gmail.com>\n" +"Language-Team: NONE\n" +"Language: zh_CN\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 3.4.2\n" + +#: utils/idle_monitor/nhm_idle.c:36 +msgid "Processor Core C3" +msgstr "处理器 Core C3" + +#: utils/idle_monitor/nhm_idle.c:43 +msgid "Processor Core C6" +msgstr "处理器 Core C6" + +#: utils/idle_monitor/nhm_idle.c:51 +msgid "Processor Package C3" +msgstr "处理器套件 C3" + +#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70 +msgid "Processor Package C6" +msgstr "处理器套件 C6" + +#: utils/idle_monitor/snb_idle.c:33 +msgid "Processor Core C7" +msgstr "处理器 Core C7" + +#: utils/idle_monitor/snb_idle.c:40 +msgid "Processor Package C2" +msgstr "处理器套件 C2" + +#: utils/idle_monitor/snb_idle.c:47 +msgid "Processor Package C7" +msgstr "处理器套件 C7" + +#: utils/idle_monitor/amd_fam14h_idle.c:56 +msgid "Package in sleep state (PC1 or deeper)" +msgstr "Package in sleep state (PC1 或更深)" + +#: utils/idle_monitor/amd_fam14h_idle.c:63 +msgid "Processor Package C1" +msgstr "处理器套件 C1" + +#: utils/idle_monitor/amd_fam14h_idle.c:77 +msgid "North Bridge P1 boolean counter (returns 0 or 1)" +msgstr "北桥 P1 布尔计数器(返回 0 或 1)" + +#: utils/idle_monitor/mperf_monitor.c:35 +msgid "Processor Core not idle" +msgstr "处理器 Core不空闲" + +#: utils/idle_monitor/mperf_monitor.c:42 +msgid "Processor Core in an idle state" +msgstr "处理器 Core处于空闲状态" + +#: utils/idle_monitor/mperf_monitor.c:50 +msgid "Average Frequency (including boost) in MHz" +msgstr "平均频率(包括增加频率),单位 MHz" + +#: utils/idle_monitor/cpupower-monitor.c:66 +#, c-format +msgid "" +"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor:[-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:69 +#, c-format +msgid "" +"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor:[-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:71 +#, c-format +msgid "\t -v: be more verbose\n" +msgstr "-v:更详细\n" + +#: utils/idle_monitor/cpupower-monitor.c:73 +#, c-format +msgid "\t -h: print this help\n" +msgstr "-h:打印此帮助\n" + +#: utils/idle_monitor/cpupower-monitor.c:74 +#, c-format +msgid "\t -i: time interval to measure for in seconds (default 1)\n" +msgstr "-i:测量的时间间隔(以秒为单位)(默认 1)\n" + +#: utils/idle_monitor/cpupower-monitor.c:75 +#, c-format +msgid "\t -t: show CPU topology/hierarchy\n" +msgstr "-t:显示CPU拓扑/层次结构\n" + +#: utils/idle_monitor/cpupower-monitor.c:76 +#, c-format +msgid "\t -l: list available CPU sleep monitors (for use with -m)\n" +msgstr "-l:列出可用的 CPU 睡眠监视器(与 -m 一起使用)\n" + +#: utils/idle_monitor/cpupower-monitor.c:77 +#, c-format +msgid "\t -m: show specific CPU sleep monitors only (in same order)\n" +msgstr "-m:仅显示特定的CPU睡眠监视器(按相同顺序)\n" + +#: utils/idle_monitor/cpupower-monitor.c:79 +#, c-format +msgid "" +"only one of: -t, -l, -m are allowed\n" +"If none of them is passed," +msgstr "" +"仅允许以下之一:-t、-l、-m\n" +"如果都没有通过的话" + +#: utils/idle_monitor/cpupower-monitor.c:80 +#, c-format +msgid " all supported monitors are shown\n" +msgstr " 显示所有支持的显示器\n" + +#: utils/idle_monitor/cpupower-monitor.c:197 +#, c-format +msgid "Monitor %s, Counter %s has no count function. Implementation error\n" +msgstr "监视器 %s、计数器 %s 无计数功能。 执行错误\n" + +#: utils/idle_monitor/cpupower-monitor.c:207 +#, c-format +msgid " *is offline\n" +msgstr " *离线\n" + +#: utils/idle_monitor/cpupower-monitor.c:236 +#, c-format +msgid "%s: max monitor name length (%d) exceeded\n" +msgstr "%s:超出最大监视器名称长度 (%d)\n" + +#: utils/idle_monitor/cpupower-monitor.c:250 +#, c-format +msgid "No matching monitor found in %s, try -l option\n" +msgstr "在 %s 中找不到匹配的监视器,请尝试 -l 选项\n" + +#: utils/idle_monitor/cpupower-monitor.c:266 +#, c-format +msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n" +msgstr "监视器“%s”(%d 状态)- 可能会在 %u 秒后溢出\n" + +#: utils/idle_monitor/cpupower-monitor.c:319 +#, c-format +msgid "%s took %.5f seconds and exited with status %d\n" +msgstr "%s 用了 %.5f 秒并退出,状态为 %d\n" + +#: utils/idle_monitor/cpupower-monitor.c:406 +#, c-format +msgid "Cannot read number of available processors\n" +msgstr "无法读取可用处理器的数量\n" + +#: utils/idle_monitor/cpupower-monitor.c:417 +#, c-format +msgid "Available monitor %s needs root access\n" +msgstr "可用监视器 %s 需要 root 访问权限\n" + +#: utils/idle_monitor/cpupower-monitor.c:428 +#, c-format +msgid "No HW Cstate monitors found\n" +msgstr "未找到 HW Cstate 监视器\n" + +#: utils/cpupower.c:78 +#, c-format +msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n" +msgstr "cpupower [ -c cpulist ] subcommand [ARGS]\n" + +#: utils/cpupower.c:79 +#, c-format +msgid "cpupower --version\n" +msgstr "cpupower --version\n" + +#: utils/cpupower.c:80 +#, c-format +msgid "Supported subcommands are:\n" +msgstr "支持的子命令有:\n" + +#: utils/cpupower.c:83 +#, c-format +msgid "" +"\n" +"Some subcommands can make use of the -c cpulist option.\n" +msgstr "" +"\n" +"某些子命令可以使用 -c cpulist 选项。\n" + +#: utils/cpupower.c:84 +#, c-format +msgid "Look at the general cpupower manpage how to use it\n" +msgstr "看看一般的cpupower manpage如何使用它\n" + +#: utils/cpupower.c:85 +#, c-format +msgid "and read up the subcommand's manpage whether it is supported.\n" +msgstr "并阅读子命令的manpage是否受支持。\n" + +#: utils/cpupower.c:86 +#, c-format +msgid "" +"\n" +"Use cpupower help subcommand for getting help for above subcommands.\n" +msgstr "" +"\n" +"使用 cpupower help subcommand获取上述子命令的帮助。\n" + +#: utils/cpupower.c:91 +#, c-format +msgid "Report errors and bugs to %s, please.\n" +msgstr "请向 %s 报告错误和错误。\n" + +#: utils/cpupower.c:114 +#, c-format +msgid "Error parsing cpu list\n" +msgstr "解析cpu列表时出错\n" + +#: utils/cpupower.c:172 +#, c-format +msgid "Subcommand %s needs root privileges\n" +msgstr "子命令 %s 需要 root 权限\n" + +#: utils/cpufreq-info.c:31 +#, c-format +msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n" +msgstr "无法计算 CPU 数量(%s:%s),假设为 1\n" + +#: utils/cpufreq-info.c:63 +#, c-format +msgid "" +" minimum CPU frequency - maximum CPU frequency - governor\n" +msgstr "最低 CPU 频率 - 最高 CPU 频率 - 调速器\n" + +#: utils/cpufreq-info.c:151 +#, c-format +msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n" +msgstr "评估 CPU %d 上的 Boost 功能时出错 - 您是 root 吗?\n" + +#. P state changes via MSR are identified via cpuid 80000007 +#. on Intel and AMD, but we assume boost capable machines can do that +#. if (cpuid_eax(0x80000000) >= 0x80000007 +#. && (cpuid_edx(0x80000007) & (1 << 7))) +#. +#: utils/cpufreq-info.c:161 +#, c-format +msgid " boost state support: \n" +msgstr " 升压状态支持:\n" + +#: utils/cpufreq-info.c:163 +#, c-format +msgid " Supported: %s\n" +msgstr " 支持:%s\n" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "yes" +msgstr "是" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "no" +msgstr "不是" + +#: utils/cpufreq-info.c:164 +#, c-format +msgid " Active: %s\n" +msgstr " 活跃:%s\n" + +#: utils/cpufreq-info.c:177 +#, c-format +msgid " Boost States: %d\n" +msgstr " 提升状态:%d\n" + +#: utils/cpufreq-info.c:178 +#, c-format +msgid " Total States: %d\n" +msgstr " 状态总数:%d\n" + +#: utils/cpufreq-info.c:181 +#, c-format +msgid " Pstate-Pb%d: %luMHz (boost state)\n" +msgstr " Pstate-Pb%d:%luMHz(升压状态)\n" + +#: utils/cpufreq-info.c:184 +#, c-format +msgid " Pstate-P%d: %luMHz\n" +msgstr " Pstate-P%d:%luMHz\n" + +#: utils/cpufreq-info.c:211 +#, c-format +msgid " no or unknown cpufreq driver is active on this CPU\n" +msgstr " 该 CPU 上没有或未知的 cpufreq 驱动程序处于活动状态\n" + +#: utils/cpufreq-info.c:213 +#, c-format +msgid " driver: %s\n" +msgstr " 驱动程序:%s\n" + +#: utils/cpufreq-info.c:219 +#, c-format +msgid " CPUs which run at the same hardware frequency: " +msgstr " 以相同硬件频率运行的 CPU:" + +#: utils/cpufreq-info.c:230 +#, c-format +msgid " CPUs which need to have their frequency coordinated by software: " +msgstr " 需要通过软件协调频率的 CPU:" + +#: utils/cpufreq-info.c:241 +#, c-format +msgid " maximum transition latency: " +msgstr " 最大转换延迟:" + +#: utils/cpufreq-info.c:247 +#, c-format +msgid " hardware limits: " +msgstr " 硬件限制:" + +#: utils/cpufreq-info.c:256 +#, c-format +msgid " available frequency steps: " +msgstr " 可用频率范围:" + +#: utils/cpufreq-info.c:269 +#, c-format +msgid " available cpufreq governors: " +msgstr " 可用的cpufreq调节器:" + +#: utils/cpufreq-info.c:280 +#, c-format +msgid " current policy: frequency should be within " +msgstr " 当前政策:频率应在" + +#: utils/cpufreq-info.c:282 +#, c-format +msgid " and " +msgstr "和" + +#: utils/cpufreq-info.c:286 +#, c-format +msgid "" +"The governor \"%s\" may decide which speed to use\n" +" within this range.\n" +msgstr "" +"调速器“%s”可以决定使用哪种速度\n" +" 在这个范围内。\n" + +#: utils/cpufreq-info.c:293 +#, c-format +msgid " current CPU frequency is " +msgstr " 当前CPU频率是" + +#: utils/cpufreq-info.c:296 +#, c-format +msgid " (asserted by call to hardware)" +msgstr " (通过调用硬件来断言)" + +#: utils/cpufreq-info.c:304 +#, c-format +msgid " cpufreq stats: " +msgstr " cpu频率统计:" + +#: utils/cpufreq-info.c:472 +#, c-format +msgid "Usage: cpupower freqinfo [options]\n" +msgstr "用法:cpupower freqinfo [选项]\n" + +#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23 +#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148 +#, c-format +msgid "Options:\n" +msgstr "选项:\n" + +#: utils/cpufreq-info.c:474 +#, c-format +msgid " -e, --debug Prints out debug information [default]\n" +msgstr " -e, --debug 打印出调试信息[默认]\n" + +#: utils/cpufreq-info.c:475 +#, c-format +msgid "" +" -f, --freq Get frequency the CPU currently runs at, according\n" +" to the cpufreq core *\n" +msgstr "" +" -f, --freq 获取CPU当前运行的频率,根据\n" +" 到 cpufreq 核心 *\n" + +#: utils/cpufreq-info.c:477 +#, c-format +msgid "" +" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n" +" it from hardware (only available to root) *\n" +msgstr "" +" -w, --hwfreq 通过读取获取CPU当前运行的频率\n" +" 它来自硬件(仅适用于root)*\n" + +#: utils/cpufreq-info.c:479 +#, c-format +msgid "" +" -l, --hwlimits Determine the minimum and maximum CPU frequency " +"allowed *\n" +msgstr " -l, --hwlimits 确定允许的最小和最大 CPU 频率 *\n" + +#: utils/cpufreq-info.c:480 +#, c-format +msgid " -d, --driver Determines the used cpufreq kernel driver *\n" +msgstr " -d, --driver 确定使用的 cpufreq 内核驱动程序 *\n" + +#: utils/cpufreq-info.c:481 +#, c-format +msgid " -p, --policy Gets the currently used cpufreq policy *\n" +msgstr " -p, --policy 获取当前使用的cpufreq策略 *\n" + +#: utils/cpufreq-info.c:482 +#, c-format +msgid " -g, --governors Determines available cpufreq governors *\n" +msgstr " -g, --governors 确定可用的 cpufreq 调节器 *\n" + +#: utils/cpufreq-info.c:483 +#, c-format +msgid "" +" -r, --related-cpus Determines which CPUs run at the same hardware " +"frequency *\n" +msgstr " -r, --lated-cpus 确定哪些 CPU 以相同的硬件频率运行 *\n" + +#: utils/cpufreq-info.c:484 +#, c-format +msgid "" +" -a, --affected-cpus Determines which CPUs need to have their frequency\n" +" coordinated by software *\n" +msgstr "" +" -a, --affected-cpus 确定哪些 CPU 需要其频率\n" +" 由软件协调*\n" + +#: utils/cpufreq-info.c:486 +#, c-format +msgid " -s, --stats Shows cpufreq statistics if available\n" +msgstr " -s, --stats 显示 cpufreq 统计信息(如果有)\n" + +#: utils/cpufreq-info.c:487 +#, c-format +msgid "" +" -y, --latency Determines the maximum latency on CPU frequency " +"changes *\n" +msgstr " -y, --latency 确定 CPU 频率变化的最大延迟*\n" + +#: utils/cpufreq-info.c:488 +#, c-format +msgid " -b, --boost Checks for turbo or boost modes *\n" +msgstr " -b, --boost 检查 Turbo 或 boost 模式 *\n" + +#: utils/cpufreq-info.c:489 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"cpufreq\n" +" interface in 2.4. and early 2.6. kernels\n" +msgstr "" +" -o, --proc 打印 /proc/cpufreq 提供的信息\n" +" 2.4 中的接口。 以及 2.6 之前的内核。\n" + +#: utils/cpufreq-info.c:491 +#, c-format +msgid "" +" -m, --human human-readable output for the -f, -w, -s and -y " +"parameters\n" +msgstr " -m, -- human -f, -w, -s 和 -y 参数的人类可读输出\n" + +#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help 打印此屏幕\n" + +#: utils/cpufreq-info.c:495 +#, c-format +msgid "" +"If no argument or only the -c, --cpu parameter is given, debug output " +"about\n" +"cpufreq is printed which is useful e.g. for reporting bugs.\n" +msgstr "" +"screen如果没有参数或仅给出了 -c, --cpu 参数,则调试输出有关\n" +"cpufreq 被打印出来,这很有用,例如 用于报告错误。\n" + +#: utils/cpufreq-info.c:497 +#, c-format +msgid "" +"For the arguments marked with *, omitting the -c or --cpu argument is\n" +"equivalent to setting it to zero\n" +msgstr "" +"对于标有 * 的参数,省略 -c 或 --cpu 参数是\n" +"相当于将其设置为零\n" + +#: utils/cpufreq-info.c:580 +#, c-format +msgid "" +"The argument passed to this tool can't be combined with passing a --cpu " +"argument\n" +msgstr "传递给此工具的参数不能与传递 --cpu 参数结合使用\n" + +#: utils/cpufreq-info.c:596 +#, c-format +msgid "" +"You can't specify more than one --cpu parameter and/or\n" +"more than one output-specific argument\n" +msgstr "" +"您不能指定多个 --cpu 参数和/或\n" +"多个特定于输出的参数\n" + +#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42 +#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213 +#, c-format +msgid "invalid or unknown argument\n" +msgstr "无效或未知的参数\n" + +#: utils/cpufreq-info.c:617 +#, c-format +msgid "couldn't analyze CPU %d as it doesn't seem to be present\n" +msgstr "无法分析 CPU %d,因为它似乎不存在\n" + +#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142 +#, c-format +msgid "analyzing CPU %d:\n" +msgstr "分析 CPU %d:\n" + +#: utils/cpufreq-set.c:25 +#, c-format +msgid "Usage: cpupower frequency-set [options]\n" +msgstr "用法:cpupower frequency-set [选项]\n" + +#: utils/cpufreq-set.c:27 +#, c-format +msgid "" +" -d FREQ, --min FREQ new minimum CPU frequency the governor may " +"select\n" +msgstr " -d FREQ, --min FREQ 调控器可以选择的新的最小 CPU 频率\n" + +#: utils/cpufreq-set.c:28 +#, c-format +msgid "" +" -u FREQ, --max FREQ new maximum CPU frequency the governor may " +"select\n" +msgstr " -u FREQ, --max FREQ 调控器可以选择的新的最大 CPU 频率\n" + +#: utils/cpufreq-set.c:29 +#, c-format +msgid " -g GOV, --governor GOV new cpufreq governor\n" +msgstr " -g GOV, --governor GOV 新的 cpufreq 调节器\n" + +#: utils/cpufreq-set.c:30 +#, c-format +msgid "" +" -f FREQ, --freq FREQ specific frequency to be set. Requires " +"userspace\n" +" governor to be available and loaded\n" +msgstr "" +" -f FREQ, --freq FREQ 要设置的特定频率。 需要用户空间\n" +" 调速器可用并已加载\n" + +#: utils/cpufreq-set.c:32 +#, c-format +msgid " -r, --related Switches all hardware-related CPUs\n" +msgstr " -r, --related 切换所有与硬件相关的CPU\n" + +#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help 打印此屏幕\n" + +#: utils/cpufreq-set.c:35 +#, c-format +msgid "" +"Notes:\n" +"1. Omitting the -c or --cpu argument is equivalent to setting it to " +"\"all\"\n" +msgstr "" +"注意:\n" +"1.省略-c或--cpu参数相当于将其设置为“all”\n" + +#: utils/cpufreq-set.c:37 +#, c-format +msgid "" +"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other " +"parameter\n" +" except the -c CPU, --cpu CPU parameter\n" +"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n" +" by postfixing the value with the wanted unit name, without any space\n" +" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n" +msgstr "" +"2. -f FREQ、--freq FREQ参数不能与任何其他参数组合使用\n" +" 除了 -c CPU、--cpu CPU 参数\n" +"3. 频率可以以 Hz、kHz(默认)、MHz、GHz 或 THz 为单位传递\n" +" 通过在值后面添加所需的单位名称,不带任何空格\n" +" (以 kHz 为单位的频率 =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000)。\n" + +#: utils/cpufreq-set.c:57 +#, c-format +msgid "" +"Error setting new values. Common errors:\n" +"- Do you have proper administration rights? (super-user?)\n" +"- Is the governor you requested available and modprobed?\n" +"- Trying to set an invalid policy?\n" +"- Trying to set a specific frequency, but userspace governor is not " +"available,\n" +" for example because of hardware which cannot be set to a specific " +"frequency\n" +" or because the userspace governor isn't loaded?\n" +msgstr "" +"设置新值时出错。 常见错误:\n" +"- 您有适当的管理权吗? (超级用户?)\n" +"- 您请求的调控器是否可用并已进行 modprobed?\n" +"- 尝试设置无效的策略?\n" +"- 尝试设置特定频率,但用户空间调控器不可用,\n" +" 例如由于硬件无法设置为特定频率\n" +" 或者因为用户空间调控器未加载?\n" + +#: utils/cpufreq-set.c:170 +#, c-format +msgid "wrong, unknown or unhandled CPU?\n" +msgstr "错误、未知或未处理的CPU?\n" + +#: utils/cpufreq-set.c:302 +#, c-format +msgid "" +"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n" +"-g/--governor parameters\n" +msgstr "" +"-f/--freq 参数不能与 -d/--min、-u/--max 或\n" +"-g/--调速器参数\n" + +#: utils/cpufreq-set.c:308 +#, c-format +msgid "" +"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n" +"-g/--governor must be passed\n" +msgstr "" +"-f/--freq、-d/--min、-u/--max 和 -f/--freq 中的至少一个参数\n" +"-g/--governor 必须通过\n" + +#: utils/cpufreq-set.c:347 +#, c-format +msgid "Setting cpu: %d\n" +msgstr "设置CPU:%d\n" + +#: utils/cpupower-set.c:22 +#, c-format +msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" +msgstr "用法: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" + +#: utils/cpupower-set.c:24 +#, c-format +msgid "" +" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias [VAL] 设置 CPU 的功耗与性能策略\n" +" Intel 型号 [0-15],请参阅manpage了解详细信息\n" + +#: utils/cpupower-set.c:26 +#, c-format +msgid "" +" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n" +msgstr " -m, --sched-mc [VAL] 设置内核的多核调度程序策略。\n" + +#: utils/cpupower-set.c:27 +#, c-format +msgid "" +" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler " +"policy.\n" +msgstr " -s, --sched-smt [VAL] 设置内核的线程同级调度程序策略。\n" + +#: utils/cpupower-set.c:80 +#, c-format +msgid "--perf-bias param out of range [0-%d]\n" +msgstr "--perf-bias 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:91 +#, c-format +msgid "--sched-mc param out of range [0-%d]\n" +msgstr "--sched-mc 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:102 +#, c-format +msgid "--sched-smt param out of range [0-%d]\n" +msgstr "--sched-smt 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:121 +#, c-format +msgid "Error setting sched-mc %s\n" +msgstr "设置 sched-mc %s 时出错\n" + +#: utils/cpupower-set.c:127 +#, c-format +msgid "Error setting sched-smt %s\n" +msgstr "设置 sched-smt %s 时出错\n" + +#: utils/cpupower-set.c:146 +#, c-format +msgid "Error setting perf-bias value on CPU %d\n" +msgstr "在 CPU %d 上设置性能偏差值时出错\n" + +#: utils/cpupower-info.c:21 +#, c-format +msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n" +msgstr "用法:cpupower info [-b][-m][-s]\n" + +#: utils/cpupower-info.c:23 +#, c-format +msgid "" +" -b, --perf-bias Gets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias 获取 CPU 在某些方面的功耗与性能策略\n" +" Intel 型号 [0-15],请参阅联机帮助页了解详细信" +"息\n" + +#: utils/cpupower-info.c:25 +#, c-format +msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n" +msgstr " -m, --sched-mc 获取内核的多核调度程序策略。\n" + +#: utils/cpupower-info.c:26 +#, c-format +msgid "" +" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n" +msgstr " -s, --sched-smt 获取内核的线程同级调度程序策略。\n" + +#: utils/cpupower-info.c:28 +#, c-format +msgid "" +"\n" +"Passing no option will show all info, by default only on core 0\n" +msgstr "" +"\n" +"不传递任何选项将显示所有信息,默认情况下仅在核心 0 上\n" + +#: utils/cpupower-info.c:102 +#, c-format +msgid "System's multi core scheduler setting: " +msgstr "系统的多核调度器设置:" + +#. if sysfs file is missing it's: errno == ENOENT +#: utils/cpupower-info.c:105 utils/cpupower-info.c:114 +#, c-format +msgid "not supported\n" +msgstr "不支持\n" + +#: utils/cpupower-info.c:111 +#, c-format +msgid "System's thread sibling scheduler setting: " +msgstr "系统的线程兄调度程序设置:" + +#: utils/cpupower-info.c:126 +#, c-format +msgid "Intel's performance bias setting needs root privileges\n" +msgstr "Intel的性能偏差设置需要root权限\n" + +#: utils/cpupower-info.c:128 +#, c-format +msgid "System does not support Intel's performance bias setting\n" +msgstr "系统不支持Intel的性能偏差设置\n" + +#: utils/cpupower-info.c:147 +#, c-format +msgid "Could not read perf-bias value\n" +msgstr "无法读取性能偏差值\n" + +#: utils/cpupower-info.c:150 +#, c-format +msgid "perf-bias: %d\n" +msgstr "性能偏差:%d\n" + +#: utils/cpuidle-info.c:28 +#, c-format +msgid "Analyzing CPU %d:\n" +msgstr "正在分析 CPU %d:\n" + +#: utils/cpuidle-info.c:32 +#, c-format +msgid "CPU %u: No idle states\n" +msgstr "CPU %u:无空闲状态\n" + +#: utils/cpuidle-info.c:36 +#, c-format +msgid "CPU %u: Can't read idle state info\n" +msgstr "CPU %u:无法读取空闲状态信息\n" + +#: utils/cpuidle-info.c:41 +#, c-format +msgid "Could not determine max idle state %u\n" +msgstr "无法确定最大空闲状态 %u\n" + +#: utils/cpuidle-info.c:46 +#, c-format +msgid "Number of idle states: %d\n" +msgstr "空闲状态数:%d\n" + +#: utils/cpuidle-info.c:48 +#, c-format +msgid "Available idle states:" +msgstr "可用的空闲状态:" + +#: utils/cpuidle-info.c:71 +#, c-format +msgid "Flags/Description: %s\n" +msgstr "标志/描述:%s\n" + +#: utils/cpuidle-info.c:74 +#, c-format +msgid "Latency: %lu\n" +msgstr "延迟:%lu\n" + +#: utils/cpuidle-info.c:76 +#, c-format +msgid "Usage: %lu\n" +msgstr "用法:%lu\n" + +#: utils/cpuidle-info.c:78 +#, c-format +msgid "Duration: %llu\n" +msgstr "持续时间:%llu\n" + +#: utils/cpuidle-info.c:90 +#, c-format +msgid "Could not determine cpuidle driver\n" +msgstr "无法确定 cpuidle 驱动程序\n" + +#: utils/cpuidle-info.c:94 +#, c-format +msgid "CPUidle driver: %s\n" +msgstr "CPU 空闲驱动程序:%s\n" + +#: utils/cpuidle-info.c:99 +#, c-format +msgid "Could not determine cpuidle governor\n" +msgstr "无法确定 cpuidle 调控器\n" + +#: utils/cpuidle-info.c:103 +#, c-format +msgid "CPUidle governor: %s\n" +msgstr "CPU 空闲调节器:%s\n" + +#: utils/cpuidle-info.c:122 +#, c-format +msgid "CPU %u: Can't read C-state info\n" +msgstr "CPU %u:无法读取 C 状态信息\n" + +#. printf("Cstates: %d\n", cstates); +#: utils/cpuidle-info.c:127 +#, c-format +msgid "active state: C0\n" +msgstr "活动状态: C0\n" + +#: utils/cpuidle-info.c:128 +#, c-format +msgid "max_cstate: C%u\n" +msgstr "最大c状态: C%u\n" + +#: utils/cpuidle-info.c:129 +#, c-format +msgid "maximum allowed latency: %lu usec\n" +msgstr "允许的最大延迟:%lu usec\n" + +#: utils/cpuidle-info.c:130 +#, c-format +msgid "states:\t\n" +msgstr "状态:\t\n" + +#: utils/cpuidle-info.c:132 +#, c-format +msgid " C%d: type[C%d] " +msgstr " C%d: 类型[C%d]" + +#: utils/cpuidle-info.c:134 +#, c-format +msgid "promotion[--] demotion[--] " +msgstr "晋升[--] 降级[--]" + +#: utils/cpuidle-info.c:135 +#, c-format +msgid "latency[%03lu] " +msgstr "延迟[%03lu]" + +#: utils/cpuidle-info.c:137 +#, c-format +msgid "usage[%08lu] " +msgstr "使用情况[%08lu]" + +#: utils/cpuidle-info.c:139 +#, c-format +msgid "duration[%020Lu] \n" +msgstr "持续时间[%020Lu]\n" + +#: utils/cpuidle-info.c:147 +#, c-format +msgid "Usage: cpupower idleinfo [options]\n" +msgstr "用法:cpupower idleinfo [选项]\n" + +#: utils/cpuidle-info.c:149 +#, c-format +msgid " -s, --silent Only show general C-state information\n" +msgstr " -s, --silent 只显示一般C状态信息\n" + +#: utils/cpuidle-info.c:150 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"acpi/processor/*/power\n" +" interface in older kernels\n" +msgstr "" +" -o, --proc 打印 /proc/acpi/processor/*/power 提供的信息\n" +" 旧内核中的接口\n" + +#: utils/cpuidle-info.c:209 +#, c-format +msgid "You can't specify more than one output-specific argument\n" +msgstr "您不能指定多个特定于输出的参数\n" + +#~ msgid "" +#~ " -c CPU, --cpu CPU CPU number which information shall be determined " +#~ "about\n" +#~ msgstr "" +#~ " -c CPU, --cpu CPU Numéro du CPU pour lequel l'information sera " +#~ "affichée\n" + +#~ msgid "" +#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be " +#~ "modified\n" +#~ msgstr "" +#~ " -c CPU, --cpu CPU numéro du CPU à prendre en compte pour les\n" +#~ " changements\n" diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 643271b6fc6f..491ca21dccdb 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -81,6 +81,9 @@ as resume failures. .TP \fB-wifitrace\fR Trace through the wifi reconnect time and include it in the timeline. +.TP +\fB-debugtiming\fR +Add timestamp to each printed output line, accurate to the millisecond. .SS "advanced" .TP diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index ef87e63c05c7..918eae58b0b4 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -18,7 +18,7 @@ # # Links: # Home Page -# https://01.org/pm-graph +# https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html # Source repo # git@github.com:intel/pm-graph # @@ -65,6 +65,7 @@ import gzip from threading import Thread from subprocess import call, Popen, PIPE import base64 +import traceback debugtiming = False mystarttime = time.time() @@ -86,7 +87,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.12' + version = '5.13' ansi = False rs = 0 display = '' @@ -236,7 +237,11 @@ class SystemValues: 'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 }, 'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 }, 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 }, - 'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 }, + 'usleep_range': { + 'func':'usleep_range_state', + 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, + 'ub': 1 + }, 'mutex_lock_slowpath': { 'func':'__mutex_lock_slowpath', 'ub': 1 }, 'acpi_os_stall': {'ub': 1}, 'rt_mutex_slowlock': {'ub': 1}, @@ -342,15 +347,21 @@ class SystemValues: if self.verbose or msg.startswith('WARNING:'): pprint(msg) def signalHandler(self, signum, frame): - if not self.result: - return signame = self.signames[signum] if signum in self.signames else 'UNKNOWN' - msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno) + if signame in ['SIGUSR1', 'SIGUSR2', 'SIGSEGV']: + traceback.print_stack() + stack = traceback.format_list(traceback.extract_stack()) + self.outputResult({'stack':stack}) + if signame == 'SIGUSR1': + return + msg = '%s caused a tool exit, line %d' % (signame, frame.f_lineno) + pprint(msg) self.outputResult({'error':msg}) + os.kill(os.getpid(), signal.SIGKILL) sys.exit(3) def signalHandlerInit(self): capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT', - 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM'] + 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'USR1', 'USR2'] self.signames = dict() for i in capture: s = 'SIG'+i @@ -859,6 +870,11 @@ class SystemValues: # files needed for any trace data files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock', 'trace_marker', 'trace_options', 'tracing_on'] + # legacy check for old systems + if not os.path.exists(self.tpath+'trace'): + self.tpath = '/sys/kernel/debug/tracing/' + if not os.path.exists(self.epath): + self.epath = '/sys/kernel/debug/tracing/events/power/' # files needed for callgraph trace data tp = self.tpath if(self.usecallgraph): @@ -911,6 +927,13 @@ class SystemValues: if num > 0: n = '%d' % num fp = open(self.result, 'a') + if 'stack' in testdata: + fp.write('Printing stack trace:\n') + for line in testdata['stack']: + fp.write(line) + fp.close() + self.sudoUserchown(self.result) + return if 'error' in testdata: fp.write('result%s: fail\n' % n) fp.write('error%s: %s\n' % (n, testdata['error'])) @@ -1980,7 +2003,7 @@ class Data: length = -1.0 if(start >= 0 and end >= 0): length = end - start - if pid == -2 or name not in sysvals.tracefuncs.keys(): + if pid >= -2: i = 2 origname = name while(name in list): @@ -2753,7 +2776,8 @@ class Timeline: def createHeader(self, sv, stamp): if(not stamp['time']): return - self.html += '<div class="version"><a href="https://01.org/pm-graph">%s v%s</a></div>' \ + self.html += '<div class="version"><a href="https://www.intel.com/content/www/'+\ + 'us/en/developer/topic-technology/open/pm-graph/overview.html">%s v%s</a></div>' \ % (sv.title, sv.version) if sv.logmsg and sv.testlog: self.html += '<button id="showtest" class="logbtn btnfmt">log</button>' @@ -5238,12 +5262,16 @@ def addScriptCode(hf, testruns): } var info = dev[i].title.split(" "); var pname = info[info.length-1]; - pd[pname] = parseFloat(info[info.length-3].slice(1)); - total[0] += pd[pname]; + var length = parseFloat(info[info.length-3].slice(1)); + if (pname in pd) + pd[pname] += length; + else + pd[pname] = length; + total[0] += length; if(pname.indexOf("suspend") >= 0) - total[tidx] += pd[pname]; + total[tidx] += length; else - total[tidx+1] += pd[pname]; + total[tidx+1] += length; } } var devname = deviceTitle(this.title, total, cpu); @@ -5262,7 +5290,7 @@ def addScriptCode(hf, testruns): phases[i].style.left = left+"%"; phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms"; left += w; - var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id]+" ms<br></t4>"; + var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id].toFixed(3)+" ms<br></t4>"; var pname = "<t3 style=\"font-size:"+fs2+"px\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>"; phases[i].innerHTML = time+pname; } else { @@ -6742,6 +6770,7 @@ def printHelp(): ' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\ ' -wifitrace Trace kernel execution through wifi reconnect.\n'\ ' -netfix Use netfix to reset the network in the event it fails to resume.\n'\ + ' -debugtiming Add timestamp to each printed line\n'\ ' [testprep]\n'\ ' -sync Sync the filesystems before starting the test\n'\ ' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\ @@ -7047,7 +7076,6 @@ if __name__ == '__main__': except: doError('No result file supplied', True) sysvals.result = val - sysvals.signalHandlerInit() else: doError('Invalid argument: '+arg, True) @@ -7057,6 +7085,7 @@ if __name__ == '__main__': if(sysvals.usecallgraph and sysvals.useprocmon): doError('-proc is not compatible with -f') + sysvals.signalHandlerInit() if sysvals.usecallgraph and sysvals.cgskip: sysvals.vprint('Using cgskip file: %s' % sysvals.cgskip) sysvals.setCallgraphBlacklist(sysvals.cgskip) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 067717bce1d4..a7f7ed01421c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -33,6 +33,9 @@ name as necessary to disambiguate it from others is necessary. Note that option msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute <device> is a perf device from /sys/bus/event_source/devices/<device> eg. cstate_core + On Intel hybrid platforms, instead of one "cpu" perf device there are two, "cpu_core" and "cpu_atom" devices for P and E cores respectively. + Turbostat, in this case, allow user to use "cpu" device and will automatically detect the type of a CPU and translate it to "cpu_core" and "cpu_atom" accordingly. + For a complete example see "ADD PERF COUNTER EXAMPLE #2 (using virtual "cpu" device)". <event> is a perf event for given device from /sys/bus/event_source/devices/<device>/events/<event> eg. c1-residency perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency @@ -187,6 +190,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt. +.PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. @@ -387,6 +392,28 @@ CPU pCPU%c1 CPU%c1 .fi +.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) +Here we run on hybrid, Raptor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +We add a counter showing number of L3 cache misses, using virtual "cpu" device, +labeling it with the column header, "VCMISS". +We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, +labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, +labeling it with the column header, "ECMISS". This will fail on pcore cpu0. +We display it only once, after the conclusion of 0.1 second sleep. +.nf +sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 +0.104630 sec +CPU ECMISS PCMISS VCMISS +- 0x0000000000000000 0x0000000000000000 0x0000000000000000 +0 0x0000000000000000 0x0000000000007951 0x0000000000007796 +12 0x000000000001137a 0x0000000000000000 0x0000000000011392 + +.fi + .SH ADD PMT COUNTER EXAMPLE Here we limit turbostat to showing just the CPU number 0. We add two counters, showing crystal clock count and the DC6 residency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 089220aaa5c9..58a487c225a7 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -31,6 +31,9 @@ ) // end copied section +#define CPUID_LEAF_MODEL_ID 0x1A +#define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 + #define X86_VENDOR_INTEL 0 #include INTEL_FAMILY_HEADER @@ -89,6 +92,9 @@ #define PERF_DEV_NAME_BYTES 32 #define PERF_EVT_NAME_BYTES 32 +#define INTEL_ECORE_TYPE 0x20 +#define INTEL_PCORE_TYPE 0x40 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -194,6 +200,8 @@ struct msr_counter bic[] = { { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -256,6 +264,8 @@ struct msr_counter bic[] = { #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) #define BIC_Diec6 (1ULL << 58) +#define BIC_SysWatt (1ULL << 59) +#define BIC_Sys_J (1ULL << 60) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) @@ -263,7 +273,7 @@ struct msr_counter bic[] = { #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; @@ -370,7 +380,6 @@ enum gfx_sysfs_idx { }; struct gfx_sysfs_info { - const char *path; FILE *fp; unsigned int val; unsigned long long val_ull; @@ -502,12 +511,15 @@ enum rapl_msrs { RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ + RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ + RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ }; #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) +#define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) @@ -708,7 +720,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -725,42 +737,44 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; +/* Copied from cnl_features, with PC7/PC9 removed */ static const struct platform_features adl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, + .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = cnl_features.has_nhm_msrs, + .has_config_tdp = cnl_features.has_config_tdp, + .bclk_freq = cnl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, + .cst_limit = cnl_features.cst_limit, + .has_irtl_msrs = cnl_features.has_irtl_msrs, + .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, + .trl_msrs = cnl_features.trl_msrs, + .tcc_offset_bits = cnl_features.tcc_offset_bits, + .rapl_msrs = cnl_features.rapl_msrs, + .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; -static const struct platform_features arl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, +/* Copied from adl_features, with PC3/PC8 removed */ +static const struct platform_features lnl_features = { + .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = adl_features.has_nhm_msrs, + .has_config_tdp = adl_features.has_config_tdp, + .bclk_freq = adl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, + .cst_limit = adl_features.cst_limit, + .has_irtl_msrs = adl_features.has_irtl_msrs, + .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, + .trl_msrs = adl_features.trl_msrs, + .tcc_offset_bits = adl_features.tcc_offset_bits, + .rapl_msrs = adl_features.rapl_msrs, + .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; static const struct platform_features skx_features = { @@ -790,7 +804,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -806,7 +820,7 @@ static const struct platform_features spr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features srf_features = { @@ -822,7 +836,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -838,7 +852,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -997,18 +1011,19 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_SAPPHIRERAPIDS_X, &spr_features }, { INTEL_EMERALDRAPIDS_X, &spr_features }, { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_D, &spr_features }, { INTEL_LAKEFIELD, &cnl_features }, { INTEL_ALDERLAKE, &adl_features }, { INTEL_ALDERLAKE_L, &adl_features }, { INTEL_RAPTORLAKE, &adl_features }, { INTEL_RAPTORLAKE_P, &adl_features }, { INTEL_RAPTORLAKE_S, &adl_features }, - { INTEL_METEORLAKE, &cnl_features }, - { INTEL_METEORLAKE_L, &cnl_features }, - { INTEL_ARROWLAKE_H, &arl_features }, - { INTEL_ARROWLAKE_U, &arl_features }, - { INTEL_ARROWLAKE, &arl_features }, - { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_METEORLAKE, &adl_features }, + { INTEL_METEORLAKE_L, &adl_features }, + { INTEL_ARROWLAKE_H, &adl_features }, + { INTEL_ARROWLAKE_U, &adl_features }, + { INTEL_ARROWLAKE, &adl_features }, + { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1100,6 +1115,7 @@ enum rapl_rci_index { RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, RAPL_RCI_INDEX_CORE_ENERGY = 6, + RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, NUM_RAPL_COUNTERS, }; @@ -1126,6 +1142,7 @@ struct rapl_counter_info_t { struct rapl_counter_info_t *rapl_counter_info_perdomain; unsigned int rapl_counter_info_perdomain_size; +#define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) struct rapl_counter_arch_info { @@ -1247,6 +1264,19 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .compat_scale = 1.0, .flags = 0, }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic = BIC_SysWatt | BIC_Sys_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, }; struct rapl_counter { @@ -1674,6 +1704,7 @@ enum { IDX_PP1_ENERGY, IDX_PKG_PERF, IDX_DRAM_PERF, + IDX_PSYS_ENERGY, IDX_COUNT, }; @@ -1718,6 +1749,9 @@ off_t idx_to_offset(int idx) case IDX_DRAM_PERF: offset = MSR_DRAM_PERF_STATUS; break; + case IDX_PSYS_ENERGY: + offset = MSR_PLATFORM_ENERGY_STATUS; + break; default: offset = -1; } @@ -1748,6 +1782,9 @@ int offset_to_idx(off_t offset) case MSR_DRAM_PERF_STATUS: idx = IDX_DRAM_PERF; break; + case MSR_PLATFORM_ENERGY_STATUS: + idx = IDX_PSYS_ENERGY; + break; default: idx = -1; } @@ -1769,6 +1806,8 @@ int idx_valid(int idx) return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + case IDX_PSYS_ENERGY: + return platform->rapl_msrs & RAPL_PSYS; default: return 0; } @@ -1840,6 +1879,10 @@ struct system_summary { struct pkg_data packages; } average; +struct platform_counters { + struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ +} platform_counters_odd, platform_counters_even; + struct cpu_topology { int physical_package_id; int die_id; @@ -1848,6 +1891,7 @@ struct cpu_topology { int logical_node_id; /* 0-based count within the package */ int physical_core_id; int thread_id; + int type; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -2291,7 +2335,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2365,7 +2409,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2496,13 +2540,18 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } ppmt = ppmt->next; } + if (DO_BIC(BIC_SysWatt)) + outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_Sys_J)) + outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); + outp += sprintf(outp, "\n"); } @@ -2510,6 +2559,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p { int i; struct msr_counter *mp; + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); @@ -2581,6 +2631,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); + outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -2619,6 +2670,9 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir */ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + static int count; + + struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; char *fmt8; int i; @@ -2628,6 +2682,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *delim = "\t"; int printed = 0; + if (t == &average.threads) { + pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; + ++count; + } + /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -2788,6 +2847,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2799,9 +2860,6 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = t->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } @@ -2869,6 +2927,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2880,9 +2940,6 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = c->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } @@ -3068,6 +3125,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -3079,14 +3138,18 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = p->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } } + if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); + if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -3394,6 +3457,11 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; } +void delta_platform(struct platform_counters *new, struct platform_counters *old) +{ + old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; +} + void rapl_counter_clear(struct rapl_counter *c) { c->raw_value = 0; @@ -4123,6 +4191,9 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) { + if (rci->source[idx] == COUNTER_SOURCE_NONE) + return; + rc->raw_value = rci->data[idx]; rc->unit = rci->unit[idx]; rc->scale = rci->scale[idx]; @@ -4130,6 +4201,7 @@ void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) { + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; @@ -4157,6 +4229,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { case COUNTER_SOURCE_NONE: + rci->data[i] = 0; break; case COUNTER_SOURCE_PERF: @@ -4195,7 +4268,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct } } - BUILD_BUG_ON(NUM_RAPL_COUNTERS != 7); + BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); @@ -4203,6 +4276,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); + write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); return 0; } @@ -5385,6 +5459,9 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) if (*next == '-') /* no negative cpu numbers */ return 1; + if (*next == '\0' || *next == '\n') + break; + start = strtoul(next, &next, 10); if (start >= CPU_SUBSET_MAXCPUS) @@ -5656,6 +5733,32 @@ int init_thread_id(int cpu) return 0; } +int set_my_cpu_type(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + + __cpuid(0, max_level, ebx, ecx, edx); + + if (max_level < CPUID_LEAF_MODEL_ID) + return 0; + + __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); + + return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); +} + +int set_cpu_hybrid_type(int cpu) +{ + if (cpu_migrate(cpu)) + return -1; + + int type = set_my_cpu_type(); + + cpus[cpu].type = type; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -5728,28 +5831,21 @@ int snapshot_proc_interrupts(void) */ int snapshot_graphics(int idx) { - FILE *fp; int retval; + rewind(gfx_info[idx].fp); + switch (idx) { case GFX_rc6: case SAM_mc6: - fp = fopen_or_die(gfx_info[idx].path, "r"); - retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); + retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); if (retval != 1) err(1, "rc6"); - fclose(fp); return 0; case GFX_MHz: case GFX_ACTMHz: case SAM_MHz: case SAM_ACTMHz: - if (gfx_info[idx].fp == NULL) { - gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); - } else { - rewind(gfx_info[idx].fp); - fflush(gfx_info[idx].fp); - } retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); if (retval != 1) err(1, "MHz"); @@ -6116,6 +6212,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); @@ -6139,6 +6236,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_even, &platform_counters_odd); compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); @@ -6442,14 +6540,25 @@ static void probe_intel_uncore_frequency(void) probe_intel_uncore_frequency_legacy(); } +static void set_graphics_fp(char *path, int idx) +{ + if (!access(path, R_OK)) + gfx_info[idx].fp = fopen_or_die(path, "r"); +} + +/* Enlarge this if there are /sys/class/drm/card2 ... */ +#define GFX_MAX_CARDS 2 + static void probe_graphics(void) { + char path[PATH_MAX]; + int i; + /* Xe graphics sysfs knobs */ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { FILE *fp; char buf[8]; bool gt0_is_gt; - int idx; fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); if (!fp) @@ -6468,81 +6577,76 @@ static void probe_graphics(void) else goto next; - idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); - idx = gt0_is_gt ? GFX_MHz : SAM_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); - idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); - idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; - if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); - idx = gt0_is_gt ? SAM_MHz : GFX_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); - idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); goto end; } next: /* New i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; + for (i = 0; i < GFX_MAX_CARDS; i++) { + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + if (!access(path, R_OK)) + break; + } - if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; + if (i == GFX_MAX_CARDS) + goto legacy_i915; - if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + set_graphics_fp(path, GFX_rc6); - if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) - gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); + set_graphics_fp(path, GFX_MHz); - if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) - gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); + set_graphics_fp(path, GFX_ACTMHz); - if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) - gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); + set_graphics_fp(path, SAM_mc6); - goto end; - } + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); + set_graphics_fp(path, SAM_MHz); + + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); + set_graphics_fp(path, SAM_ACTMHz); + goto end; + +legacy_i915: /* Fall back to traditional i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); - if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); + if (!gfx_info[GFX_MHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); - if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); + if (!gfx_info[GFX_ACTMHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); end: - if (gfx_info[GFX_rc6].path) + if (gfx_info[GFX_rc6].fp) BIC_PRESENT(BIC_GFX_rc6); - if (gfx_info[GFX_MHz].path) + if (gfx_info[GFX_MHz].fp) BIC_PRESENT(BIC_GFXMHz); - if (gfx_info[GFX_ACTMHz].path) + if (gfx_info[GFX_ACTMHz].fp) BIC_PRESENT(BIC_GFXACTMHz); - if (gfx_info[SAM_mc6].path) + if (gfx_info[SAM_mc6].fp) BIC_PRESENT(BIC_SAM_mc6); - if (gfx_info[SAM_MHz].path) + if (gfx_info[SAM_MHz].fp) BIC_PRESENT(BIC_SAMMHz); - if (gfx_info[SAM_ACTMHz].path) + if (gfx_info[SAM_ACTMHz].fp) BIC_PRESENT(BIC_SAMACTMHz); } @@ -6911,8 +7015,8 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; + const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; + const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; if (rapl_joules) bic_enabled &= ~bic_watt_bits; @@ -7572,6 +7676,9 @@ void rapl_perf_init(void) domain_visited[next_domain] = 1; + if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) + continue; + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; /* Check if the counter is enabled and accessible */ @@ -8196,7 +8303,7 @@ void topology_probe(bool startup) set_max_cpu_num(); topo.num_cpus = 0; for_all_proc_cpus(count_cpus); - if (!summary_only && topo.num_cpus > 1) + if (!summary_only) BIC_PRESENT(BIC_CPU); if (debug > 1) @@ -8284,6 +8391,8 @@ void topology_probe(bool startup) for_all_proc_cpus(init_thread_id); + for_all_proc_cpus(set_cpu_hybrid_type); + /* * For online cpus * find max_core_id, max_package_id @@ -8332,7 +8441,7 @@ void topology_probe(bool startup) topo.cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); - if (!summary_only && topo.cores_per_node > 1) + if (!summary_only) BIC_PRESENT(BIC_Core); topo.num_die = topo.max_die_id + 1; @@ -8548,6 +8657,35 @@ void check_perf_access(void) bic_enabled &= ~BIC_IPC; } +bool perf_has_hybrid_devices(void) +{ + /* + * 0: unknown + * 1: has separate perf device for p and e core + * -1: doesn't have separate perf device for p and e core + */ + static int cached; + + if (cached > 0) + return true; + + if (cached < 0) + return false; + + if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { + cached = -1; + return false; + } + + if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { + cached = -1; + return false; + } + + cached = 1; + return true; +} + int added_perf_counters_init_(struct perf_counter_info *pinfo) { size_t num_domains = 0; @@ -8604,29 +8742,56 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (domain_visited[next_domain]) continue; - perf_type = read_perf_type(pinfo->device); + /* + * Intel hybrid platforms expose different perf devices for P and E cores. + * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are + * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". + * + * This makes it more complicated to the user, because most of the counters + * are available on both and have to be handled manually, otherwise. + * + * Code below, allow user to use the old "cpu" name, which is translated accordingly. + */ + const char *perf_device = pinfo->device; + + if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { + switch (cpus[cpu].type) { + case INTEL_PCORE_TYPE: + perf_device = "cpu_core"; + break; + + case INTEL_ECORE_TYPE: + perf_device = "cpu_atom"; + break; + + default: /* Don't change, we will probably fail and report a problem soon. */ + break; + } + } + + perf_type = read_perf_type(perf_device); if (perf_type == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "type"); + __func__, perf_device, pinfo->event, "type"); continue; } - perf_config = read_perf_config(pinfo->device, pinfo->event); + perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "config"); + __func__, perf_device, pinfo->event, "config"); continue; } /* Scale is not required, some counters just don't have it. */ - perf_scale = read_perf_scale(pinfo->device, pinfo->event); + perf_scale = read_perf_scale(perf_device, pinfo->event); if (perf_scale == 0.0) perf_scale = 1.0; fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, pinfo->device, pinfo->event, cpu); + __func__, perf_device, pinfo->event, cpu); continue; } @@ -8636,7 +8801,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (debug) fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -9071,7 +9236,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.07.26 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9781,7 +9946,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; @@ -9805,6 +9970,12 @@ void cmdline(int argc, char **argv) break; case 'D': dump_only++; + /* + * Force the no_perf early to prevent using it as a source. + * User asks for raw values, but perf returns them relative + * to the opening of the file descriptor. + */ + no_perf = 1; break; case 'e': /* --enable specified counter */ diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 225f61f9bfca..2f36b7b6418d 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -36,15 +36,15 @@ static inline void ___vmlinux_h_sanity_check___(void) s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; -void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym; -void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym; +void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; +void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; -bool scx_bpf_consume(u64 dsq_id) __ksym; -void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; -bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; -bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym; +void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; +void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; @@ -71,11 +71,11 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; -struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; +struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; /* - * Use the following as @it__iter when calling - * scx_bpf_dispatch[_vtime]_from_dsq() from within bpf_for_each() loops. + * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from + * within bpf_for_each() loops. */ #define BPF_FOR_EACH_ITER (&___it) @@ -320,7 +320,7 @@ u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; /* * Access a cpumask in read-only mode (typically to check bits). */ -const struct cpumask *cast_mask(struct bpf_cpumask *mask) +static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) { return (const struct cpumask *)mask; } diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index e5afe9efd3f3..d56520100a26 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -20,19 +20,110 @@ (bpf_ksym_exists(scx_bpf_task_cgroup) ? \ scx_bpf_task_cgroup((p)) : NULL) -/* v6.12: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") */ -#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it, slice) \ - (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice) ? \ - scx_bpf_dispatch_from_dsq_set_slice((it), (slice)) : (void)0) -#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it, vtime) \ - (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime) ? \ - scx_bpf_dispatch_from_dsq_set_vtime((it), (vtime)) : (void)0) -#define __COMPAT_scx_bpf_dispatch_from_dsq(it, p, dsq_id, enq_flags) \ - (bpf_ksym_exists(scx_bpf_dispatch_from_dsq) ? \ - scx_bpf_dispatch_from_dsq((it), (p), (dsq_id), (enq_flags)) : false) -#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it, p, dsq_id, enq_flags) \ - (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq) ? \ - scx_bpf_dispatch_vtime_from_dsq((it), (p), (dsq_id), (enq_flags)) : false) +/* + * v6.13: The verb `dispatch` was too overloaded and confusing. kfuncs are + * renamed to unload the verb. + * + * Build error is triggered if old names are used. New binaries work with both + * new and old names. The compat macros will be removed on v6.15 release. + * + * scx_bpf_dispatch_from_dsq() and friends were added during v6.12 by + * 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()"). + * Preserve __COMPAT macros until v6.15. + */ +void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; +void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; +bool scx_bpf_consume___compat(u64 dsq_id) __ksym __weak; +void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; +bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; + +#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_insert) ? \ + scx_bpf_dsq_insert((p), (dsq_id), (slice), (enq_flags)) : \ + scx_bpf_dispatch___compat((p), (dsq_id), (slice), (enq_flags))) + +#define scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_insert_vtime) ? \ + scx_bpf_dsq_insert_vtime((p), (dsq_id), (slice), (vtime), (enq_flags)) : \ + scx_bpf_dispatch_vtime___compat((p), (dsq_id), (slice), (vtime), (enq_flags))) + +#define scx_bpf_dsq_move_to_local(dsq_id) \ + (bpf_ksym_exists(scx_bpf_dsq_move_to_local) ? \ + scx_bpf_dsq_move_to_local((dsq_id)) : \ + scx_bpf_consume___compat((dsq_id))) + +#define __COMPAT_scx_bpf_dsq_move_set_slice(it__iter, slice) \ + (bpf_ksym_exists(scx_bpf_dsq_move_set_slice) ? \ + scx_bpf_dsq_move_set_slice((it__iter), (slice)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___compat) ? \ + scx_bpf_dispatch_from_dsq_set_slice___compat((it__iter), (slice)) : \ + (void)0)) + +#define __COMPAT_scx_bpf_dsq_move_set_vtime(it__iter, vtime) \ + (bpf_ksym_exists(scx_bpf_dsq_move_set_vtime) ? \ + scx_bpf_dsq_move_set_vtime((it__iter), (vtime)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___compat) ? \ + scx_bpf_dispatch_from_dsq_set_vtime___compat((it__iter), (vtime)) : \ + (void) 0)) + +#define __COMPAT_scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_move) ? \ + scx_bpf_dsq_move((it__iter), (p), (dsq_id), (enq_flags)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___compat) ? \ + scx_bpf_dispatch_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \ + false)) + +#define __COMPAT_scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_move_vtime) ? \ + scx_bpf_dsq_move_vtime((it__iter), (p), (dsq_id), (enq_flags)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___compat) ? \ + scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \ + false)) + +#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \ + _Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()") + +#define scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags) \ + _Static_assert(false, "scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()") + +#define scx_bpf_consume(dsq_id) ({ \ + _Static_assert(false, "scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()"); \ + false; \ +}) + +#define scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()") + +#define scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()") + +#define scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()"); \ + false; \ +}) + +#define scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "scx_bpf_dispatch_vtime_from_dsq() renamed to scx_bpf_dsq_move_vtime()"); \ + false; \ +}) + +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_slice() renamed to __COMPAT_scx_bpf_dsq_move_set_slice()") + +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_vtime() renamed to __COMPAT_scx_bpf_dsq_move_set_vtime()") + +#define __COMPAT_scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move()"); \ + false; \ +}) + +#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_vtime_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move_vtime()"); \ + false; \ +}) /* * Define sched_ext_ops. This may be expanded to define multiple variants for diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index 8dd8eb73b6b8..e6fad6211f6c 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -118,14 +118,14 @@ void BPF_STRUCT_OPS(central_enqueue, struct task_struct *p, u64 enq_flags) */ if ((p->flags & PF_KTHREAD) && p->nr_cpus_allowed == 1) { __sync_fetch_and_add(&nr_locals, 1); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, - enq_flags | SCX_ENQ_PREEMPT); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, + enq_flags | SCX_ENQ_PREEMPT); return; } if (bpf_map_push_elem(¢ral_q, &pid, 0)) { __sync_fetch_and_add(&nr_overflows, 1); - scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, enq_flags); + scx_bpf_dsq_insert(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, enq_flags); return; } @@ -158,7 +158,7 @@ static bool dispatch_to_cpu(s32 cpu) */ if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr)) { __sync_fetch_and_add(&nr_mismatches, 1); - scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, 0); + scx_bpf_dsq_insert(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, 0); bpf_task_release(p); /* * We might run out of dispatch buffer slots if we continue dispatching @@ -172,7 +172,7 @@ static bool dispatch_to_cpu(s32 cpu) } /* dispatch to local and mark that @cpu doesn't need more */ - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0); if (cpu != central_cpu) scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); @@ -219,13 +219,13 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev) } /* look for a task to run on the central CPU */ - if (scx_bpf_consume(FALLBACK_DSQ_ID)) + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ_ID)) return; dispatch_to_cpu(central_cpu); } else { bool *gimme; - if (scx_bpf_consume(FALLBACK_DSQ_ID)) + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ_ID)) return; gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids); diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index b722baf6da4b..4e3afcd260bf 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -341,7 +341,7 @@ s32 BPF_STRUCT_OPS(fcg_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake if (is_idle) { set_bypassed_at(p, taskc); stat_inc(FCG_STAT_LOCAL); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); } return cpu; @@ -377,10 +377,12 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) */ if (p->nr_cpus_allowed == 1 && (p->flags & PF_KTHREAD)) { stat_inc(FCG_STAT_LOCAL); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, + enq_flags); } else { stat_inc(FCG_STAT_GLOBAL); - scx_bpf_dispatch(p, FALLBACK_DSQ, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, FALLBACK_DSQ, SCX_SLICE_DFL, + enq_flags); } return; } @@ -391,7 +393,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) goto out_release; if (fifo_sched) { - scx_bpf_dispatch(p, cgrp->kn->id, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, cgrp->kn->id, SCX_SLICE_DFL, enq_flags); } else { u64 tvtime = p->scx.dsq_vtime; @@ -402,8 +404,8 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) tvtime = cgc->tvtime_now - SCX_SLICE_DFL; - scx_bpf_dispatch_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, - tvtime, enq_flags); + scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, + tvtime, enq_flags); } cgrp_enqueued(cgrp, cgc); @@ -663,7 +665,7 @@ static bool try_pick_next_cgroup(u64 *cgidp) goto out_free; } - if (!scx_bpf_consume(cgid)) { + if (!scx_bpf_dsq_move_to_local(cgid)) { bpf_cgroup_release(cgrp); stat_inc(FCG_STAT_PNC_EMPTY); goto out_stash; @@ -743,7 +745,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) goto pick_next_cgroup; if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) { - if (scx_bpf_consume(cpuc->cur_cgid)) { + if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) { stat_inc(FCG_STAT_CNS_KEEP); return; } @@ -783,7 +785,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) pick_next_cgroup: cpuc->cur_at = now; - if (scx_bpf_consume(FALLBACK_DSQ)) { + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ)) { cpuc->cur_cgid = 0; return; } diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 67e2a7968cc9..ee264947e0c3 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -226,15 +226,15 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) */ if (tctx->force_local) { tctx->force_local = false; - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); return; } - /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */ - if (!(enq_flags & SCX_ENQ_WAKEUP) && + /* if select_cpu() wasn't called, try direct dispatch */ + if (!(enq_flags & SCX_ENQ_CPU_SELECTED) && (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { __sync_fetch_and_add(&nr_ddsp_from_enq, 1); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); return; } @@ -247,7 +247,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) if (enq_flags & SCX_ENQ_REENQ) { s32 cpu; - scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, 0, enq_flags); cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); if (cpu >= 0) scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); @@ -262,7 +262,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) /* Queue on the selected FIFO. If the FIFO overflows, punt to global. */ if (bpf_map_push_elem(ring, &pid, 0)) { - scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, slice_ns, enq_flags); return; } @@ -294,10 +294,10 @@ static void update_core_sched_head_seq(struct task_struct *p) } /* - * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly - * selective priority boosting mechanism by scanning SHARED_DSQ looking for - * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This - * makes minor difference only when dsp_batch is larger than 1. + * To demonstrate the use of scx_bpf_dsq_move(), implement silly selective + * priority boosting mechanism by scanning SHARED_DSQ looking for highpri tasks, + * moving them to HIGHPRI_DSQ and then consuming them first. This makes minor + * difference only when dsp_batch is larger than 1. * * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and * non-rq-lock holding BPF programs. As demonstration, this function is called @@ -318,11 +318,11 @@ static bool dispatch_highpri(bool from_timer) if (tctx->highpri) { /* exercise the set_*() and vtime interface too */ - __COMPAT_scx_bpf_dispatch_from_dsq_set_slice( + __COMPAT_scx_bpf_dsq_move_set_slice( BPF_FOR_EACH_ITER, slice_ns * 2); - __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime( + __COMPAT_scx_bpf_dsq_move_set_vtime( BPF_FOR_EACH_ITER, highpri_seq++); - __COMPAT_scx_bpf_dispatch_vtime_from_dsq( + __COMPAT_scx_bpf_dsq_move_vtime( BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0); } } @@ -340,9 +340,9 @@ static bool dispatch_highpri(bool from_timer) else cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0); - if (__COMPAT_scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p, - SCX_DSQ_LOCAL_ON | cpu, - SCX_ENQ_PREEMPT)) { + if (__COMPAT_scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p, + SCX_DSQ_LOCAL_ON | cpu, + SCX_ENQ_PREEMPT)) { if (cpu == this_cpu) { dispatched = true; __sync_fetch_and_add(&nr_expedited_local, 1); @@ -374,7 +374,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) if (dispatch_highpri(false)) return; - if (!nr_highpri_queued && scx_bpf_consume(SHARED_DSQ)) + if (!nr_highpri_queued && scx_bpf_dsq_move_to_local(SHARED_DSQ)) return; if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { @@ -385,7 +385,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) */ p = bpf_task_from_pid(2); if (p) { - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, 0); bpf_task_release(p); return; } @@ -431,7 +431,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) update_core_sched_head_seq(p); __sync_fetch_and_add(&nr_dispatched, 1); - scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0); + scx_bpf_dsq_insert(p, SHARED_DSQ, slice_ns, 0); bpf_task_release(p); batch--; @@ -439,7 +439,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) if (!batch || !scx_bpf_dispatch_nr_slots()) { if (dispatch_highpri(false)) return; - scx_bpf_consume(SHARED_DSQ); + scx_bpf_dsq_move_to_local(SHARED_DSQ); return; } if (!cpuc->dsp_cnt) diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py index 8bc626ede1c4..b800d4f5f2e9 100644 --- a/tools/sched_ext/scx_show_state.py +++ b/tools/sched_ext/scx_show_state.py @@ -35,6 +35,8 @@ print(f'enabled : {read_static_key("__scx_ops_enabled")}') print(f'switching_all : {read_int("scx_switching_all")}') print(f'switched_all : {read_static_key("__scx_switched_all")}') print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})') -print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}') +print(f'in_softlockup : {prog["scx_in_softlockup"].value_()}') +print(f'breather_depth: {read_atomic("scx_ops_breather_depth")}') +print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}') print(f'nr_rejected : {read_atomic("scx_nr_rejected")}') print(f'enable_seq : {read_atomic("scx_enable_seq")}') diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c index ed7e8d535fc5..31f915b286c6 100644 --- a/tools/sched_ext/scx_simple.bpf.c +++ b/tools/sched_ext/scx_simple.bpf.c @@ -31,10 +31,10 @@ UEI_DEFINE(uei); /* * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues - * (meaning, cannot be dispatched to with scx_bpf_dispatch_vtime()). We + * (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We * therefore create a separate DSQ with ID 0 that we dispatch to and consume - * from. If scx_simple only supported global FIFO scheduling, then we could - * just use SCX_DSQ_GLOBAL. + * from. If scx_simple only supported global FIFO scheduling, then we could just + * use SCX_DSQ_GLOBAL. */ #define SHARED_DSQ 0 @@ -65,7 +65,7 @@ s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 w cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); if (is_idle) { stat_inc(0); /* count local queueing */ - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); } return cpu; @@ -76,7 +76,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) stat_inc(1); /* count global queueing */ if (fifo_sched) { - scx_bpf_dispatch(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); } else { u64 vtime = p->scx.dsq_vtime; @@ -87,14 +87,14 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) vtime = vtime_now - SCX_SLICE_DFL; - scx_bpf_dispatch_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, - enq_flags); + scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, + enq_flags); } } void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SHARED_DSQ); + scx_bpf_dsq_move_to_local(SHARED_DSQ); } void BPF_STRUCT_OPS(simple_running, struct task_struct *p) diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index f6a50f06dfc4..eabfe9f411d9 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -7,8 +7,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/) -ifndef ARCH -ARCH := $(HOSTARCH) +ifeq ($(strip $(ARCH)),) +override ARCH := $(HOSTARCH) endif SRCARCH := $(ARCH) diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 9179942d7f15..f2135d619a0b 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -42,6 +42,7 @@ static char *input_file; static char *output_file; static uint32_t speed = 500000; static uint16_t delay; +static uint16_t word_delay; static int verbose; static int transfer_size; static int iterations; @@ -124,6 +125,7 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) .rx_buf = (unsigned long)rx, .len = len, .delay_usecs = delay, + .word_delay_usecs = word_delay, .speed_hz = speed, .bits_per_word = bits, }; @@ -172,11 +174,12 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) static void print_usage(const char *prog) { - printf("Usage: %s [-2348CDFHILMNORSZbdilopsv]\n", prog); + printf("Usage: %s [-2348CDFHILMNORSZbdilopsvw]\n", prog); puts("general device settings:\n" " -D --device device to use (default /dev/spidev1.1)\n" " -s --speed max speed (Hz)\n" " -d --delay delay (usec)\n" + " -w --word-delay word delay (usec)\n" " -l --loop loopback\n" "spi mode:\n" " -H --cpha clock phase\n" @@ -213,6 +216,7 @@ static void parse_opts(int argc, char *argv[]) { "device", 1, 0, 'D' }, { "speed", 1, 0, 's' }, { "delay", 1, 0, 'd' }, + { "word-delay", 1, 0, 'w' }, { "loop", 0, 0, 'l' }, { "cpha", 0, 0, 'H' }, { "cpol", 0, 0, 'O' }, @@ -237,7 +241,7 @@ static void parse_opts(int argc, char *argv[]) }; int c; - c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3ZFMNR248p:vS:I:", + c = getopt_long(argc, argv, "D:s:d:w:b:i:o:lHOLC3ZFMNR248p:vS:I:", lopts, NULL); if (c == -1) @@ -253,6 +257,9 @@ static void parse_opts(int argc, char *argv[]) case 'd': delay = atoi(optarg); break; + case 'w': + word_delay = atoi(optarg); + break; case 'b': bits = atoi(optarg); break; diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index 077e6883921d..f088792a8925 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -4,4 +4,4 @@ #include "cxl.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ -EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 90d5afd52dd0..d0337c11f9ee 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -693,26 +693,22 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; } -static int mock_decoder_reset(struct cxl_decoder *cxld) +static void mock_decoder_reset(struct cxl_decoder *cxld) { struct cxl_port *port = to_cxl_port(cxld->dev.parent); int id = cxld->id; if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) - return 0; + return; dev_dbg(&port->dev, "%s reset\n", dev_name(&cxld->dev)); - if (port->commit_end != id) { + if (port->commit_end == id) + cxl_port_commit_reap(cxld); + else dev_dbg(&port->dev, "%s: out of order reset, expected decoder%d.%d\n", dev_name(&cxld->dev), port->id, port->commit_end); - return -EBUSY; - } - - port->commit_end--; cxld->flags &= ~CXL_DECODER_F_ENABLE; - - return 0; } static void default_mock_decoder(struct cxl_decoder *cxld) @@ -1062,7 +1058,7 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_64G (SZ_32G * 2) #endif -static __init int cxl_rch_init(void) +static __init int cxl_rch_topo_init(void) { int rc, i; @@ -1090,30 +1086,8 @@ static __init int cxl_rch_init(void) goto err_bridge; } - for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { - int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; - struct platform_device *rch = cxl_rch[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_rcd", idx); - if (!pdev) - goto err_mem; - pdev->dev.parent = &rch->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_rcd[i] = pdev; - } - return 0; -err_mem: - for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) - platform_device_unregister(cxl_rcd[i]); err_bridge: for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { struct platform_device *pdev = cxl_rch[i]; @@ -1127,12 +1101,10 @@ err_bridge: return rc; } -static void cxl_rch_exit(void) +static void cxl_rch_topo_exit(void) { int i; - for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) - platform_device_unregister(cxl_rcd[i]); for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { struct platform_device *pdev = cxl_rch[i]; @@ -1143,7 +1115,7 @@ static void cxl_rch_exit(void) } } -static __init int cxl_single_init(void) +static __init int cxl_single_topo_init(void) { int i, rc; @@ -1228,29 +1200,8 @@ static __init int cxl_single_init(void) cxl_swd_single[i] = pdev; } - for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { - struct platform_device *dport = cxl_swd_single[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); - if (!pdev) - goto err_mem; - pdev->dev.parent = &dport->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_mem_single[i] = pdev; - } - return 0; -err_mem: - for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem_single[i]); err_dport: for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) platform_device_unregister(cxl_swd_single[i]); @@ -1273,12 +1224,10 @@ err_bridge: return rc; } -static void cxl_single_exit(void) +static void cxl_single_topo_exit(void) { int i; - for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem_single[i]); for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) platform_device_unregister(cxl_swd_single[i]); for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) @@ -1295,6 +1244,91 @@ static void cxl_single_exit(void) } } +static void cxl_mem_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); +} + +static int cxl_mem_init(void) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { + struct platform_device *dport = cxl_switch_dport[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { + struct platform_device *dport = cxl_swd_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); + if (!pdev) + goto err_single; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_single; + } + cxl_mem_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { + int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; + struct platform_device *rch = cxl_rch[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_rcd", idx); + if (!pdev) + goto err_rcd; + pdev->dev.parent = &rch->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_rcd; + } + cxl_rcd[i] = pdev; + } + + return 0; + +err_rcd: + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); +err_single: + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); +err_mem: + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); + return rc; +} + static __init int cxl_test_init(void) { int rc, i; @@ -1407,29 +1441,11 @@ static __init int cxl_test_init(void) cxl_switch_dport[i] = pdev; } - for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { - struct platform_device *dport = cxl_switch_dport[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_mem", i); - if (!pdev) - goto err_mem; - pdev->dev.parent = &dport->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_mem[i] = pdev; - } - - rc = cxl_single_init(); + rc = cxl_single_topo_init(); if (rc) - goto err_mem; + goto err_dport; - rc = cxl_rch_init(); + rc = cxl_rch_topo_init(); if (rc) goto err_single; @@ -1442,19 +1458,20 @@ static __init int cxl_test_init(void) rc = platform_device_add(cxl_acpi); if (rc) - goto err_add; + goto err_root; + + rc = cxl_mem_init(); + if (rc) + goto err_root; return 0; -err_add: +err_root: platform_device_put(cxl_acpi); err_rch: - cxl_rch_exit(); + cxl_rch_topo_exit(); err_single: - cxl_single_exit(); -err_mem: - for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem[i]); + cxl_single_topo_exit(); err_dport: for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) platform_device_unregister(cxl_switch_dport[i]); @@ -1486,11 +1503,10 @@ static __exit void cxl_test_exit(void) { int i; + cxl_mem_exit(); platform_device_unregister(cxl_acpi); - cxl_rch_exit(); - cxl_single_exit(); - for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem[i]); + cxl_rch_topo_exit(); + cxl_single_topo_exit(); for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) platform_device_unregister(cxl_switch_dport[i]); for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) @@ -1515,5 +1531,5 @@ MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(ACPI); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("ACPI"); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index ad5c4c18c5c6..347c1e7b37bd 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1673,9 +1673,10 @@ static struct platform_driver cxl_mock_mem_driver = { .name = KBUILD_MODNAME, .dev_groups = cxl_mock_mem_groups, .groups = cxl_mock_mem_core_groups, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; module_platform_driver(cxl_mock_mem_driver); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index f4ce96cc11d4..450c7566c33f 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -76,7 +76,7 @@ int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, ACPI); +EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, "ACPI"); acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, @@ -147,7 +147,7 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) { @@ -162,7 +162,7 @@ int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) @@ -179,7 +179,7 @@ int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { @@ -194,7 +194,7 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL"); int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) { @@ -209,7 +209,7 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, @@ -226,7 +226,7 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) @@ -242,7 +242,7 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, @@ -266,7 +266,7 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, return dport; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL"); resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, struct cxl_dport *dport) @@ -283,7 +283,7 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, return component_reg_phys; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL"); void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) { @@ -297,7 +297,7 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_endpoint_parse_cdat(port); put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, "CXL"); void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { @@ -309,8 +309,8 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(ACPI); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("ACPI"); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index bc74088c458a..676fa99a8b19 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -23,7 +23,7 @@ from typing import Iterable, List, Optional, Sequence, Tuple import kunit_json import kunit_kernel import kunit_parser -from kunit_printer import stdout +from kunit_printer import stdout, null_printer class KunitStatus(Enum): SUCCESS = auto() @@ -49,6 +49,8 @@ class KunitBuildRequest(KunitConfigRequest): class KunitParseRequest: raw_output: Optional[str] json: Optional[str] + summary: bool + failed: bool @dataclass class KunitExecRequest(KunitParseRequest): @@ -235,11 +237,18 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input parse_time = time.time() - parse_start return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test + default_printer = stdout + if request.summary or request.failed: + default_printer = null_printer # Actually parse the test results. - test = kunit_parser.parse_run_tests(input_data) + test = kunit_parser.parse_run_tests(input_data, default_printer) parse_time = time.time() - parse_start + if request.failed: + kunit_parser.print_test(test, request.failed, stdout) + kunit_parser.print_summary_line(test, stdout) + if request.json: json_str = kunit_json.get_json_result( test=test, @@ -413,6 +422,14 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: help='Prints parsed test results as JSON to stdout or a file if ' 'a filename is specified. Does nothing if --raw_output is set.', type=str, const='stdout', default=None, metavar='FILE') + parser.add_argument('--summary', + help='Prints only the summary line for parsed test results.' + 'Does nothing if --raw_output is set.', + action='store_true') + parser.add_argument('--failed', + help='Prints only the failed parsed test results and summary line.' + 'Does nothing if --raw_output is set.', + action='store_true') def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree: @@ -448,6 +465,8 @@ def run_handler(cli_args: argparse.Namespace) -> None: jobs=cli_args.jobs, raw_output=cli_args.raw_output, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -495,6 +514,8 @@ def exec_handler(cli_args: argparse.Namespace) -> None: exec_request = KunitExecRequest(raw_output=cli_args.raw_output, build_dir=cli_args.build_dir, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -520,7 +541,8 @@ def parse_handler(cli_args: argparse.Namespace) -> None: # We know nothing about how the result was created! metadata = kunit_json.Metadata() request = KunitParseRequest(raw_output=cli_args.raw_output, - json=cli_args.json) + json=cli_args.json, summary=cli_args.summary, + failed=cli_args.failed) result, _ = parse_tests(request, metadata, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 61931c4926fd..e76d7894b6c5 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -105,7 +105,9 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._kconfig = qemu_arch_params.kconfig self._qemu_arch = qemu_arch_params.qemu_arch self._kernel_path = qemu_arch_params.kernel_path - self._kernel_command_line = qemu_arch_params.kernel_command_line + ' kunit_shutdown=reboot' + self._kernel_command_line = qemu_arch_params.kernel_command_line + if 'kunit_shutdown=' not in self._kernel_command_line: + self._kernel_command_line += ' kunit_shutdown=reboot' self._extra_qemu_params = qemu_arch_params.extra_qemu_params self._serial = qemu_arch_params.serial diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index ce34be15c929..29fc27e8949b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -17,7 +17,7 @@ import textwrap from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple -from kunit_printer import stdout +from kunit_printer import Printer, stdout class Test: """ @@ -54,10 +54,10 @@ class Test: """Returns string representation of a Test class object.""" return str(self) - def add_error(self, error_message: str) -> None: + def add_error(self, printer: Printer, error_message: str) -> None: """Records an error that occurred while parsing this test.""" self.counts.errors += 1 - stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') + printer.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') def ok_status(self) -> bool: """Returns true if the status was ok, i.e. passed or skipped.""" @@ -251,7 +251,7 @@ KTAP_VERSIONS = [1] TAP_VERSIONS = [13, 14] def check_version(version_num: int, accepted_versions: List[int], - version_type: str, test: Test) -> None: + version_type: str, test: Test, printer: Printer) -> None: """ Adds error to test object if version number is too high or too low. @@ -263,13 +263,14 @@ def check_version(version_num: int, accepted_versions: List[int], version_type - 'KTAP' or 'TAP' depending on the type of version line. test - Test object for current test being parsed + printer - Printer object to output error """ if version_num < min(accepted_versions): - test.add_error(f'{version_type} version lower than expected!') + test.add_error(printer, f'{version_type} version lower than expected!') elif version_num > max(accepted_versions): - test.add_error(f'{version_type} version higer than expected!') + test.add_error(printer, f'{version_type} version higer than expected!') -def parse_ktap_header(lines: LineStream, test: Test) -> bool: +def parse_ktap_header(lines: LineStream, test: Test, printer: Printer) -> bool: """ Parses KTAP/TAP header line and checks version number. Returns False if fails to parse KTAP/TAP header line. @@ -281,6 +282,7 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: Parameters: lines - LineStream of KTAP output to parse test - Test object for current test being parsed + printer - Printer object to output results Return: True if successfully parsed KTAP/TAP header line @@ -289,10 +291,10 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: tap_match = TAP_START.match(lines.peek()) if ktap_match: version_num = int(ktap_match.group(1)) - check_version(version_num, KTAP_VERSIONS, 'KTAP', test) + check_version(version_num, KTAP_VERSIONS, 'KTAP', test, printer) elif tap_match: version_num = int(tap_match.group(1)) - check_version(version_num, TAP_VERSIONS, 'TAP', test) + check_version(version_num, TAP_VERSIONS, 'TAP', test, printer) else: return False lines.pop() @@ -380,7 +382,7 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: return name == test.name def parse_test_result(lines: LineStream, test: Test, - expected_num: int) -> bool: + expected_num: int, printer: Printer) -> bool: """ Parses test result line and stores the status and name in the test object. Reports an error if the test number does not match expected @@ -398,6 +400,7 @@ def parse_test_result(lines: LineStream, test: Test, lines - LineStream of KTAP output to parse test - Test object for current test being parsed expected_num - expected test number for current test + printer - Printer object to output results Return: True if successfully parsed a test result line. @@ -420,7 +423,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check test num num = int(match.group(2)) if num != expected_num: - test.add_error(f'Expected test number {expected_num} but found {num}') + test.add_error(printer, f'Expected test number {expected_num} but found {num}') # Set status of test object status = match.group(1) @@ -486,7 +489,7 @@ def format_test_divider(message: str, len_message: int) -> str: len_2 = difference - len_1 return ('=' * len_1) + f' {message} ' + ('=' * len_2) -def print_test_header(test: Test) -> None: +def print_test_header(test: Test, printer: Printer) -> None: """ Prints test header with test name and optionally the expected number of subtests. @@ -496,6 +499,7 @@ def print_test_header(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ message = test.name if message != "": @@ -507,15 +511,15 @@ def print_test_header(test: Test) -> None: message += '(1 subtest)' else: message += f'({test.expected_count} subtests)' - stdout.print_with_timestamp(format_test_divider(message, len(message))) + printer.print_with_timestamp(format_test_divider(message, len(message))) -def print_log(log: Iterable[str]) -> None: +def print_log(log: Iterable[str], printer: Printer) -> None: """Prints all strings in saved log for test in yellow.""" formatted = textwrap.dedent('\n'.join(log)) for line in formatted.splitlines(): - stdout.print_with_timestamp(stdout.yellow(line)) + printer.print_with_timestamp(printer.yellow(line)) -def format_test_result(test: Test) -> str: +def format_test_result(test: Test, printer: Printer) -> str: """ Returns string with formatted test result with colored status and test name. @@ -525,23 +529,24 @@ def format_test_result(test: Test) -> str: Parameters: test - Test object representing current test being printed + printer - Printer object to output results Return: String containing formatted test result """ if test.status == TestStatus.SUCCESS: - return stdout.green('[PASSED] ') + test.name + return printer.green('[PASSED] ') + test.name if test.status == TestStatus.SKIPPED: - return stdout.yellow('[SKIPPED] ') + test.name + return printer.yellow('[SKIPPED] ') + test.name if test.status == TestStatus.NO_TESTS: - return stdout.yellow('[NO TESTS RUN] ') + test.name + return printer.yellow('[NO TESTS RUN] ') + test.name if test.status == TestStatus.TEST_CRASHED: - print_log(test.log) + print_log(test.log, printer) return stdout.red('[CRASHED] ') + test.name - print_log(test.log) - return stdout.red('[FAILED] ') + test.name + print_log(test.log, printer) + return printer.red('[FAILED] ') + test.name -def print_test_result(test: Test) -> None: +def print_test_result(test: Test, printer: Printer) -> None: """ Prints result line with status of test. @@ -550,10 +555,11 @@ def print_test_result(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object """ - stdout.print_with_timestamp(format_test_result(test)) + printer.print_with_timestamp(format_test_result(test, printer)) -def print_test_footer(test: Test) -> None: +def print_test_footer(test: Test, printer: Printer) -> None: """ Prints test footer with status of test. @@ -562,12 +568,38 @@ def print_test_footer(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ - message = format_test_result(test) - stdout.print_with_timestamp(format_test_divider(message, - len(message) - stdout.color_len())) + message = format_test_result(test, printer) + printer.print_with_timestamp(format_test_divider(message, + len(message) - printer.color_len())) +def print_test(test: Test, failed_only: bool, printer: Printer) -> None: + """ + Prints Test object to given printer. For a child test, the result line is + printed. For a parent test, the test header, all child test results, and + the test footer are all printed. If failed_only is true, only failed/crashed + tests will be printed. + Parameters: + test - Test object to print + failed_only - True if only failed/crashed tests should be printed. + printer - Printer object to output results + """ + if test.name == "main": + printer.print_with_timestamp(DIVIDER) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + printer.print_with_timestamp(DIVIDER) + elif test.subtests != []: + if not failed_only or not test.ok_status(): + print_test_header(test, printer) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + print_test_footer(test, printer) + else: + if not failed_only or not test.ok_status(): + print_test_result(test, printer) def _summarize_failed_tests(test: Test) -> str: """Tries to summarize all the failing subtests in `test`.""" @@ -601,7 +633,7 @@ def _summarize_failed_tests(test: Test) -> str: return 'Failures: ' + ', '.join(failures) -def print_summary_line(test: Test) -> None: +def print_summary_line(test: Test, printer: Printer) -> None: """ Prints summary line of test object. Color of line is dependent on status of test. Color is green if test passes, yellow if test is @@ -614,6 +646,7 @@ def print_summary_line(test: Test) -> None: Errors: 0" test - Test object representing current test being printed + printer - Printer object to output results """ if test.status == TestStatus.SUCCESS: color = stdout.green @@ -621,7 +654,7 @@ def print_summary_line(test: Test) -> None: color = stdout.yellow else: color = stdout.red - stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) + printer.print_with_timestamp(color(f'Testing complete. {test.counts}')) # Summarize failures that might have gone off-screen since we had a lot # of tests (arbitrarily defined as >=100 for now). @@ -630,7 +663,7 @@ def print_summary_line(test: Test) -> None: summarized = _summarize_failed_tests(test) if not summarized: return - stdout.print_with_timestamp(color(summarized)) + printer.print_with_timestamp(color(summarized)) # Other methods: @@ -654,7 +687,7 @@ def bubble_up_test_results(test: Test) -> None: elif test.counts.get_status() == TestStatus.TEST_CRASHED: test.status = TestStatus.TEST_CRASHED -def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool) -> Test: +def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool, printer: Printer) -> Test: """ Finds next test to parse in LineStream, creates new Test object, parses any subtests of the test, populates Test object with all @@ -710,6 +743,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: log - list of strings containing any preceding diagnostic lines corresponding to the current test is_subtest - boolean indicating whether test is a subtest + printer - Printer object to output results Return: Test object populated with characteristics and any subtests @@ -725,14 +759,14 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True else: # If not the main test, attempt to parse a test header containing # the KTAP version line and/or subtest header line - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: @@ -740,7 +774,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # to parse test plan and print test header test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) - print_test_header(test) + print_test_header(test, printer) expected_count = test.expected_count subtests = [] test_num = 1 @@ -758,16 +792,16 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parser reaches end of test before # parsing expected number of subtests, print # crashed subtest and record error - test.add_error('missing expected subtest!') + test.add_error(printer, 'missing expected subtest!') sub_test.log.extend(sub_log) test.counts.add_status( TestStatus.TEST_CRASHED) - print_test_result(sub_test) + print_test_result(sub_test, printer) else: test.log.extend(sub_log) break else: - sub_test = parse_test(lines, test_num, sub_log, True) + sub_test = parse_test(lines, test_num, sub_log, True, printer) subtests.append(sub_test) test_num += 1 test.subtests = subtests @@ -775,51 +809,51 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If not main test, look for test result line test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): - test.add_error('missing subtest result line!') + test.add_error(printer, 'missing subtest result line!') else: - parse_test_result(lines, test, expected_num) + parse_test_result(lines, test, expected_num, printer) # Check for there being no subtests within parent test if parent_test and len(subtests) == 0: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS): - print_log(test.log) + print_log(test.log, printer) test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') + test.add_error(printer, '0 tests run!') # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) if parent_test and is_subtest: # If test has subtests and is not the main test object, print # footer. - print_test_footer(test) + print_test_footer(test, printer) elif is_subtest: - print_test_result(test) + print_test_result(test, printer) return test -def parse_run_tests(kernel_output: Iterable[str]) -> Test: +def parse_run_tests(kernel_output: Iterable[str], printer: Printer) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test results and print condensed test results and summary line. Parameters: kernel_output - Iterable object contains lines of kernel output + printer - Printer object to output results Return: Test - the main test object with all subtests. """ - stdout.print_with_timestamp(DIVIDER) + printer.print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) test = Test() if not lines: test.name = '<missing>' - test.add_error('Could not find any KTAP output. Did any KUnit tests run?') + test.add_error(printer, 'Could not find any KTAP output. Did any KUnit tests run?') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - test = parse_test(lines, 0, [], False) + test = parse_test(lines, 0, [], False, printer) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() - stdout.print_with_timestamp(DIVIDER) - print_summary_line(test) + printer.print_with_timestamp(DIVIDER) return test diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py index 015adf87dc2c..ca119f61fe79 100644 --- a/tools/testing/kunit/kunit_printer.py +++ b/tools/testing/kunit/kunit_printer.py @@ -15,12 +15,17 @@ _RESET = '\033[0;0m' class Printer: """Wraps a file object, providing utilities for coloring output, etc.""" - def __init__(self, output: typing.IO[str]): + def __init__(self, print: bool=True, output: typing.IO[str]=sys.stdout): self._output = output - self._use_color = output.isatty() + self._print = print + if print: + self._use_color = output.isatty() + else: + self._use_color = False def print(self, message: str) -> None: - print(message, file=self._output) + if self._print: + print(message, file=self._output) def print_with_timestamp(self, message: str) -> None: ts = datetime.datetime.now().strftime('%H:%M:%S') @@ -45,4 +50,5 @@ class Printer: return len(self.red('')) # Provides a default instance that prints to stdout -stdout = Printer(sys.stdout) +stdout = Printer() +null_printer = Printer(print=False) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 2beb7327e53f..0bcb0cc002f8 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -23,6 +23,7 @@ import kunit_parser import kunit_kernel import kunit_json import kunit +from kunit_printer import stdout test_tmpdir = '' abs_test_data_dir = '' @@ -139,28 +140,28 @@ class KUnitParserTest(unittest.TestCase): def test_parse_successful_test_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_successful_nested_tests_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_kselftest_nested(self): kselftest_log = test_data_path('test_is_test_passed-kselftest.log') with open(kselftest_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(result.counts.errors, 0) @@ -168,7 +169,7 @@ class KUnitParserTest(unittest.TestCase): empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -179,7 +180,7 @@ class KUnitParserTest(unittest.TestCase): with open(missing_plan_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( - file.readlines())) + file.readlines()), stdout) # A missing test plan is not an error. self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -188,7 +189,7 @@ class KUnitParserTest(unittest.TestCase): header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(header_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -197,7 +198,7 @@ class KUnitParserTest(unittest.TestCase): no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') with open(no_plan_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, @@ -210,7 +211,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('kunit_printer.Printer.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) print_mock.stop() self.assertEqual(0, len(result.subtests)) @@ -219,7 +220,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -228,7 +229,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status) self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5)) @@ -236,7 +237,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') with open(hyphen_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -250,7 +251,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_prefix_printk_time(self): prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -258,7 +259,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -266,7 +267,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -274,7 +275,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -282,7 +283,7 @@ class KUnitParserTest(unittest.TestCase): def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertGreaterEqual(result.counts.errors, 1) @@ -290,7 +291,7 @@ class KUnitParserTest(unittest.TestCase): def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -310,7 +311,7 @@ class KUnitParserTest(unittest.TestCase): not ok 2 - test2 not ok 1 - some_failed_suite """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(kunit_parser._summarize_failed_tests(result), @@ -319,7 +320,7 @@ class KUnitParserTest(unittest.TestCase): def test_ktap_format(self): ktap_log = test_data_path('test_parse_ktap_output.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3)) self.assertEqual('suite', result.subtests[0].name) self.assertEqual('case_1', result.subtests[0].subtests[0].name) @@ -328,13 +329,13 @@ class KUnitParserTest(unittest.TestCase): def test_parse_subtest_header(self): ktap_log = test_data_path('test_parse_subtest_header.log') with open(ktap_log) as file: - kunit_parser.parse_run_tests(file.readlines()) + kunit_parser.parse_run_tests(file.readlines(), stdout) self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) def test_parse_attributes(self): ktap_log = test_data_path('test_parse_attributes.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # Test should pass with no errors self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=0)) @@ -355,7 +356,7 @@ class KUnitParserTest(unittest.TestCase): Indented more. not ok 1 test1 """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.print_mock.assert_any_call(StrContains('Test output.')) @@ -544,7 +545,7 @@ class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): with open(test_data_path(log_file)) as file: - test_result = kunit_parser.parse_run_tests(file) + test_result = kunit_parser.parse_run_tests(file, stdout) json_obj = kunit_json.get_json_result( test=test_result, metadata=kunit_json.Metadata()) @@ -810,7 +811,7 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. self.linux_source_mock.run_kernel.assert_called_once_with( @@ -823,7 +824,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300), @@ -836,7 +837,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300), diff --git a/tools/testing/kunit/qemu_configs/loongarch.py b/tools/testing/kunit/qemu_configs/loongarch.py new file mode 100644 index 000000000000..a92422967d1d --- /dev/null +++ b/tools/testing/kunit/qemu_configs/loongarch.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='loongarch', + kconfig=''' +CONFIG_EFI_STUB=n +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PVPANIC=y +CONFIG_PVPANIC_PCI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='loongarch64', + kernel_path='arch/loongarch/boot/vmlinux.elf', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[ + '-machine', 'virt', + '-device', 'pvpanic-pci', + '-cpu', 'max',]) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 892e990c034a..68a064ce598c 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -883,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = { static struct platform_driver ndtest_driver = { .probe = ndtest_probe, - .remove_new = ndtest_remove, + .remove = ndtest_remove, .driver = { .name = KBUILD_MODNAME, }, diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 1873ddbe16cc..bc30050227fd 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -462,6 +462,28 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2); + mas_destroy(&mas); + + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2); + mas_destroy(&mas); + mtree_unlock(mt); } @@ -36317,6 +36339,28 @@ static inline int check_vma_modification(struct maple_tree *mt) return 0; } +/* + * test to check that bulk stores do not use wr_rebalance as the store + * type. + */ +static inline void check_bulk_rebalance(struct maple_tree *mt) +{ + MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); + int max = 10; + + build_full_tree(mt, 0, 2); + + /* erase every entry in the tree */ + do { + /* set up bulk store mode */ + mas_expected_entries(&mas, max); + mas_erase(&mas); + MT_BUG_ON(mt, mas.store_type == wr_rebalance); + } while (mas_prev(&mas, 0) != NULL); + + mas_destroy(&mas); +} + void farmer_tests(void) { struct maple_node *node; @@ -36328,6 +36372,10 @@ void farmer_tests(void) check_vma_modification(&tree); mtree_destroy(&tree); + mt_init(&tree); + check_bulk_rebalance(&tree); + mtree_destroy(&tree); + tree.ma_root = xa_mk_value(0); mt_dump(&tree, mt_dump_dec); @@ -36406,9 +36454,93 @@ void farmer_tests(void) check_nomem(&tree); } +static unsigned long get_last_index(struct ma_state *mas) +{ + struct maple_node *node = mas_mn(mas); + enum maple_type mt = mte_node_type(mas->node); + unsigned long *pivots = ma_pivots(node, mt); + unsigned long last_index = mas_data_end(mas); + + BUG_ON(last_index == 0); + + return pivots[last_index - 1] + 1; +} + +/* + * Assert that we handle spanning stores that consume the entirety of the right + * leaf node correctly. + */ +static void test_spanning_store_regression(void) +{ + unsigned long from = 0, to = 0; + DEFINE_MTREE(tree); + MA_STATE(mas, &tree, 0, 0); + + /* + * Build a 3-level tree. We require a parent node below the root node + * and 2 leaf nodes under it, so we can span the entirety of the right + * hand node. + */ + build_full_tree(&tree, 0, 3); + + /* Descend into position at depth 2. */ + mas_reset(&mas); + mas_start(&mas); + mas_descend(&mas); + mas_descend(&mas); + + /* + * We need to establish a tree like the below. + * + * Then we can try a store in [from, to] which results in a spanned + * store across nodes B and C, with the maple state at the time of the + * write being such that only the subtree at A and below is considered. + * + * Height + * 0 Root Node + * / \ + * pivot = to / \ pivot = ULONG_MAX + * / \ + * 1 A [-----] ... + * / \ + * pivot = from / \ pivot = to + * / \ + * 2 (LEAVES) B [-----] [-----] C + * ^--- Last pivot to. + */ + while (true) { + unsigned long tmp = get_last_index(&mas); + + if (mas_next_sibling(&mas)) { + from = tmp; + to = mas.max; + } else { + break; + } + } + + BUG_ON(from == 0 && to == 0); + + /* Perform the store. */ + mas_set_range(&mas, from, to); + mas_store_gfp(&mas, xa_mk_value(0xdead), GFP_KERNEL); + + /* If the regression occurs, the validation will fail. */ + mt_validate(&tree); + + /* Cleanup. */ + __mt_destroy(&tree); +} + +static void regression_tests(void) +{ + test_spanning_store_regression(); +} + void maple_tree_tests(void) { #if !defined(BENCH) + regression_tests(); farmer_tests(); #endif maple_tree_seed(); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b38199965f99..2401e973c359 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -72,6 +72,7 @@ TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nsfs +TARGETS += pcie_bwctrl TARGETS += perf_events TARGETS += pidfd TARGETS += pid_namespace @@ -88,9 +89,10 @@ TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += rust +TARGETS += sched_ext TARGETS += seccomp TARGETS += sgx -TARGETS += sigaltstack +TARGETS += signal TARGETS += size TARGETS += sparc64 TARGETS += splice @@ -129,10 +131,10 @@ ifeq ($(filter net/lib,$(TARGETS)),) endif endif -# User can optionally provide a TARGETS skiplist. By default we skip -# BPF since it has cutting edge build time dependencies which require -# more effort to install. -SKIP_TARGETS ?= bpf +# User can optionally provide a TARGETS skiplist. By default we skip +# targets using BPF since it has cutting edge build time dependencies +# which require more effort to install. +SKIP_TARGETS ?= bpf sched_ext ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore index 12dc3fcd3456..3dd8e1176b89 100644 --- a/tools/testing/selftests/alsa/.gitignore +++ b/tools/testing/selftests/alsa/.gitignore @@ -1,3 +1,5 @@ +global-timer mixer-test pcm-test test-pcmtest-driver +utimer-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 944279160fed..8dab90ad22bb 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -27,5 +27,5 @@ include ../lib.mk $(OUTPUT)/libatest.so: conf.c alsa-local.h $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ -$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h +$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 28b93cab8c0d..22029e60eff3 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index f2d6007a2b98..0029ed9c5c9a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -98,6 +98,17 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void gcs_sigill(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); +} + static void ilrcpc_sigill(void) { /* LDAPUR W0, [SP, #8] */ @@ -361,8 +372,8 @@ static void sveaes_sigill(void) static void sveb16b16_sigill(void) { - /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ - asm volatile(".inst 0xC1E41C00" : : : ); + /* BFADD Z0.H, Z0.H, Z0.H */ + asm volatile(".inst 0x65000000" : : : ); } static void svepmull_sigill(void) @@ -490,7 +501,7 @@ static const struct hwcap_data { .name = "F8DP2", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_F8DP2, - .cpuinfo = "f8dp4", + .cpuinfo = "f8dp2", .sigill_fn = f8dp2_sigill, }, { @@ -535,6 +546,14 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "GCS", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_GCS, + .cpuinfo = "gcs", + .sigill_fn = gcs_sigill, + .sigill_reliable = true, + }, + { .name = "JSCVT", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_JSCVT, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index df3230fdac39..66ab2e0bae5f 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -81,32 +81,31 @@ do_syscall: stp x27, x28, [sp, #96] // Set SVCR if we're doing SME - cbz x1, 1f + cbz x1, load_gpr adrp x2, svcr_in ldr x2, [x2, :lo12:svcr_in] msr S3_3_C4_C2_2, x2 -1: // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, load_gpr mov w12, #0 ldr x2, =za_in -2: _ldr_za 12, 2 +1: _ldr_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, load_gpr adrp x2, zt_in add x2, x2, :lo12:zt_in _ldr_zt 2 -1: +load_gpr: // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -125,9 +124,9 @@ do_syscall: str x30, [x2], #8 // LR // Load FPRs if we're not doing neither SVE nor streaming SVE - cbnz x0, 1f + cbnz x0, check_sve_in ldr x2, =svcr_in - tbnz x2, #SVCR_SM_SHIFT, 1f + tbnz x2, #SVCR_SM_SHIFT, check_sve_in ldr x2, =fpr_in ldp q0, q1, [x2] @@ -148,8 +147,8 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] b 2f -1: +check_sve_in: // Load the SVE registers if we're doing SVE/SME ldr x2, =z_in @@ -256,32 +255,31 @@ do_syscall: stp q30, q31, [x2, #16 * 30] // Save SVCR if we're doing SME - cbz x1, 1f + cbz x1, check_sve_out mrs x2, S3_3_C4_C2_2 adrp x3, svcr_out str x2, [x3, :lo12:svcr_out] -1: // Save ZA if it's enabled - uses x12 as scratch due to SME STR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, check_sve_out mov w12, #0 ldr x2, =za_out -2: _str_za 12, 2 +1: _str_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, check_sve_out adrp x2, zt_out add x2, x2, :lo12:zt_out _str_zt 2 -1: +check_sve_out: // Save the SVE state if we have some cbz x0, 1f diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d704511a0955..5ec9a18ec802 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -81,7 +81,7 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t s */ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) { if (gpr_in[i] != gpr_out[i]) { - ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n", + ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %lx != %lx\n", cfg->name, sve_vl, i, gpr_in[i], gpr_out[i]); errors++; @@ -112,7 +112,7 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, if (!sve_vl && !(svcr & SVCR_SM_MASK)) { for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { if (fpr_in[i] != fpr_out[i]) { - ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", + ksft_print_msg("%s Q%d/%d mismatch %lx != %lx\n", cfg->name, i / 2, i % 2, fpr_in[i], fpr_out[i]); @@ -294,13 +294,13 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, int errors = 0; if (svcr_out & SVCR_SM_MASK) { - ksft_print_msg("%s Still in SM, SVCR %llx\n", + ksft_print_msg("%s Still in SM, SVCR %lx\n", cfg->name, svcr_out); errors++; } if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { - ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", + ksft_print_msg("%s PSTATE.ZA changed, SVCR %lx != %lx\n", cfg->name, svcr_in, svcr_out); errors++; } diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 9b38a0da407d..1fc46a5642c2 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -65,4 +65,19 @@ endfunction bl puts .endm +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +.macro enable_gcs + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 +.endm + #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S index 7ad59d92d02b..82c3ab70e1cf 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S +++ b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S @@ -15,10 +15,7 @@ // Load and save register values with pauses for ptrace // -// x0 - SVE in use -// x1 - SME in use -// x2 - SME2 in use -// x3 - FA64 supported +// x0 - HAVE_ flags indicating which features are in use .globl load_and_save load_and_save: @@ -44,7 +41,7 @@ load_and_save: ldp q30, q31, [x7, #16 * 30] // SME? - cbz x1, check_sve_in + tbz x0, #HAVE_SME_SHIFT, check_sve_in adrp x7, svcr_in ldr x7, [x7, :lo12:svcr_in] @@ -64,7 +61,7 @@ load_and_save: bne 1b // ZT? - cbz x2, check_sm_in + tbz x0, #HAVE_SME2_SHIFT, check_sm_in adrp x6, zt_in add x6, x6, :lo12:zt_in _ldr_zt 6 @@ -72,12 +69,14 @@ load_and_save: // In streaming mode? check_sm_in: tbz x7, #SVCR_SM_SHIFT, check_sve_in - mov x4, x3 // Load FFR if we have FA64 + + // Load FFR if we have FA64 + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b load_sve // SVE? check_sve_in: - cbz x0, wait_for_writes + tbz x0, #HAVE_SVE_SHIFT, check_fpmr_in mov x4, #1 load_sve: @@ -142,6 +141,13 @@ load_sve: ldr p14, [x7, #14, MUL VL] ldr p15, [x7, #15, MUL VL] + // This has to come after we set PSTATE.SM +check_fpmr_in: + tbz x0, #HAVE_FPMR_SHIFT, wait_for_writes + adrp x7, fpmr_in + ldr x7, [x7, :lo12:fpmr_in] + msr REG_FPMR, x7 + wait_for_writes: // Wait for the parent brk #0 @@ -165,8 +171,13 @@ wait_for_writes: stp q28, q29, [x7, #16 * 28] stp q30, q31, [x7, #16 * 30] - // SME? - cbz x1, check_sve_out + tbz x0, #HAVE_FPMR_SHIFT, check_sme_out + mrs x7, REG_FPMR + adrp x6, fpmr_out + str x7, [x6, :lo12:fpmr_out] + +check_sme_out: + tbz x0, #HAVE_SME_SHIFT, check_sve_out rdsvl 11, 1 adrp x6, sme_vl_out @@ -187,7 +198,7 @@ wait_for_writes: bne 1b // ZT? - cbz x2, check_sm_out + tbz x0, #HAVE_SME2_SHIFT, check_sm_out adrp x6, zt_out add x6, x6, :lo12:zt_out _str_zt 6 @@ -195,12 +206,14 @@ wait_for_writes: // In streaming mode? check_sm_out: tbz x7, #SVCR_SM_SHIFT, check_sve_out - mov x4, x3 // FFR? + + // Do we have FA64 and FFR? + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b read_sve // SVE? check_sve_out: - cbz x0, wait_for_reads + tbz x0, #HAVE_SVE_SHIFT, wait_for_reads mov x4, #1 rdvl x7, #1 @@ -271,7 +284,7 @@ wait_for_reads: brk #0 // Ensure we don't leave ourselves in streaming mode - cbz x1, out + tbz x0, #HAVE_SME_SHIFT, out msr S3_3_C4_C2_2, xzr out: diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index c7ceafe5f471..4930e03a7b99 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -31,6 +31,14 @@ #include "fp-ptrace.h" +#include <linux/bits.h> + +#define FPMR_LSCALE2_MASK GENMASK(37, 32) +#define FPMR_NSCALE_MASK GENMASK(31, 24) +#define FPMR_LSCALE_MASK GENMASK(22, 16) +#define FPMR_OSC_MASK GENMASK(15, 15) +#define FPMR_OSM_MASK GENMASK(14, 14) + /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -48,11 +56,22 @@ #define NT_ARM_ZT 0x40d #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e +#endif + #define ARCH_VQ_MAX 256 /* VL 128..2048 in powers of 2 */ #define MAX_NUM_VLS 5 +/* + * FPMR bits we can set without doing feature checks to see if values + * are valid. + */ +#define FPMR_SAFE_BITS (FPMR_LSCALE2_MASK | FPMR_NSCALE_MASK | \ + FPMR_LSCALE_MASK | FPMR_OSC_MASK | FPMR_OSM_MASK) + #define NUM_FPR 32 __uint128_t v_in[NUM_FPR]; __uint128_t v_expected[NUM_FPR]; @@ -78,11 +97,13 @@ char zt_in[ZT_SIG_REG_BYTES]; char zt_expected[ZT_SIG_REG_BYTES]; char zt_out[ZT_SIG_REG_BYTES]; +uint64_t fpmr_in, fpmr_expected, fpmr_out; + uint64_t sve_vl_out; uint64_t sme_vl_out; uint64_t svcr_in, svcr_expected, svcr_out; -void load_and_save(int sve, int sme, int sme2, int fa64); +void load_and_save(int flags); static bool got_alarm; @@ -128,6 +149,11 @@ static bool fa64_supported(void) return getauxval(AT_HWCAP2) & HWCAP2_SME_FA64; } +static bool fpmr_supported(void) +{ + return getauxval(AT_HWCAP2) & HWCAP2_FPMR; +} + static bool compare_buffer(const char *name, void *out, void *expected, size_t size) { @@ -198,7 +224,7 @@ static int vl_expected(struct test_config *config) static void run_child(struct test_config *config) { - int ret; + int ret, flags; /* Let the parent attach to us */ ret = ptrace(PTRACE_TRACEME, 0, 0, 0); @@ -224,8 +250,19 @@ static void run_child(struct test_config *config) } /* Load values and wait for the parent */ - load_and_save(sve_supported(), sme_supported(), - sme2_supported(), fa64_supported()); + flags = 0; + if (sve_supported()) + flags |= HAVE_SVE; + if (sme_supported()) + flags |= HAVE_SME; + if (sme2_supported()) + flags |= HAVE_SME2; + if (fa64_supported()) + flags |= HAVE_FA64; + if (fpmr_supported()) + flags |= HAVE_FPMR; + + load_and_save(flags); exit(0); } @@ -312,6 +349,14 @@ static void read_child_regs(pid_t child) iov_child.iov_len = sizeof(zt_out); read_one_child_regs(child, "ZT", &iov_parent, &iov_child); } + + if (fpmr_supported()) { + iov_parent.iov_base = &fpmr_out; + iov_parent.iov_len = sizeof(fpmr_out); + iov_child.iov_base = &fpmr_out; + iov_child.iov_len = sizeof(fpmr_out); + read_one_child_regs(child, "FPMR", &iov_parent, &iov_child); + } } static bool continue_breakpoint(pid_t child, @@ -586,6 +631,26 @@ static bool check_ptrace_values_zt(pid_t child, struct test_config *config) return compare_buffer("initial ZT", buf, zt_in, ZT_SIG_REG_BYTES); } +static bool check_ptrace_values_fpmr(pid_t child, struct test_config *config) +{ + uint64_t val; + struct iovec iov; + int ret; + + if (!fpmr_supported()) + return true; + + iov.iov_base = &val; + iov.iov_len = sizeof(val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read initial FPMR: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return compare_buffer("initial FPMR", &val, &fpmr_in, sizeof(val)); +} static bool check_ptrace_values(pid_t child, struct test_config *config) { @@ -620,6 +685,9 @@ static bool check_ptrace_values(pid_t child, struct test_config *config) if (!check_ptrace_values_zt(child, config)) pass = false; + if (!check_ptrace_values_fpmr(child, config)) + pass = false; + return pass; } @@ -823,11 +891,18 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); + bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; + if (sme_supported() && + (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) + sm_change = true; + else + sm_change = false; + fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -874,6 +949,21 @@ static void set_initial_values(struct test_config *config) memset(zt_expected, 0, ZT_SIG_REG_BYTES); memset(zt_out, 0, sizeof(zt_out)); } + + if (fpmr_supported()) { + fill_random(&fpmr_in, sizeof(fpmr_in)); + fpmr_in &= FPMR_SAFE_BITS; + + /* Entering or exiting streaming mode clears FPMR */ + if (sm_change) + fpmr_expected = 0; + else + fpmr_expected = fpmr_in; + } else { + fpmr_in = 0; + fpmr_expected = 0; + fpmr_out = 0; + } } static bool check_memory_values(struct test_config *config) @@ -924,6 +1014,12 @@ static bool check_memory_values(struct test_config *config) if (!compare_buffer("saved ZT", zt_out, zt_expected, ZT_SIG_REG_BYTES)) pass = false; + if (fpmr_out != fpmr_expected) { + ksft_print_msg("Mismatch in saved FPMR: %lx != %lx\n", + fpmr_out, fpmr_expected); + pass = false; + } + return pass; } @@ -1001,6 +1097,36 @@ static void fpsimd_write(pid_t child, struct test_config *test_config) strerror(errno), errno); } +static bool fpmr_write_supported(struct test_config *config) +{ + if (!fpmr_supported()) + return false; + + if (!sve_sme_same(config)) + return false; + + return true; +} + +static void fpmr_write_expected(struct test_config *config) +{ + fill_random(&fpmr_expected, sizeof(fpmr_expected)); + fpmr_expected &= FPMR_SAFE_BITS; +} + +static void fpmr_write(pid_t child, struct test_config *config) +{ + struct iovec iov; + int ret; + + iov.iov_len = sizeof(fpmr_expected); + iov.iov_base = &fpmr_expected; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) + ksft_print_msg("Failed to write FPMR: %s (%d)\n", + strerror(errno), errno); +} + static void sve_write_expected(struct test_config *config) { int vl = vl_expected(config); @@ -1069,21 +1195,19 @@ static void sve_write(pid_t child, struct test_config *config) static bool za_write_supported(struct test_config *config) { - if (config->svcr_expected & SVCR_SM) { - if (!(config->svcr_in & SVCR_SM)) + if (config->sme_vl_in != config->sme_vl_expected) { + /* Changing the SME VL exits streaming mode. */ + if (config->svcr_expected & SVCR_SM) { return false; - - /* Changing the SME VL exits streaming mode */ - if (config->sme_vl_in != config->sme_vl_expected) { + } + } else { + /* Otherwise we can't change streaming mode */ + if ((config->svcr_in & SVCR_SM) != + (config->svcr_expected & SVCR_SM)) { return false; } } - /* Can't disable SM outside a VL change */ - if ((config->svcr_in & SVCR_SM) && - !(config->svcr_expected & SVCR_SM)) - return false; - return true; } @@ -1259,6 +1383,12 @@ static struct test_definition base_test_defs[] = { .set_expected_values = fpsimd_write_expected, .modify_values = fpsimd_write, }, + { + .name = "FPMR write", + .supported = fpmr_write_supported, + .set_expected_values = fpmr_write_expected, + .modify_values = fpmr_write, + }, }; static struct test_definition sve_test_defs[] = { @@ -1468,6 +1598,9 @@ int main(void) if (fa64_supported()) ksft_print_msg("FA64 supported\n"); + if (fpmr_supported()) + ksft_print_msg("FPMR supported\n"); + ksft_set_plan(tests); /* Get signal handers ready before we start any children */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.h b/tools/testing/selftests/arm64/fp/fp-ptrace.h index db4f2c4d750c..c06919aaf1f7 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.h +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.h @@ -10,4 +10,16 @@ #define SVCR_SM (1 << SVCR_SM_SHIFT) #define SVCR_ZA (1 << SVCR_ZA_SHIFT) +#define HAVE_SVE_SHIFT 0 +#define HAVE_SME_SHIFT 1 +#define HAVE_SME2_SHIFT 2 +#define HAVE_FA64_SHIFT 3 +#define HAVE_FPMR_SHIFT 4 + +#define HAVE_SVE (1 << HAVE_SVE_SHIFT) +#define HAVE_SME (1 << HAVE_SME_SHIFT) +#define HAVE_SME2 (1 << HAVE_SME2_SHIFT) +#define HAVE_FA64 (1 << HAVE_FA64_SHIFT) +#define HAVE_FPMR (1 << HAVE_FPMR_SHIFT) + #endif diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index faac24bdefeb..74e23208b94c 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -28,6 +28,9 @@ #define MAX_VLS 16 +#define SIGNAL_INTERVAL_MS 25 +#define LOG_INTERVALS (1000 / SIGNAL_INTERVAL_MS) + struct child_data { char *name, *output; pid_t pid; @@ -79,7 +82,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(pipefd[1], 1); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -89,7 +92,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(startup_pipe[0], 3); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -107,16 +110,15 @@ static void child_start(struct child_data *child, const char *program) */ ret = read(3, &i, sizeof(i)); if (ret < 0) - fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", - strerror(errno), errno); + printf("read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); if (ret > 0) - fprintf(stderr, "%d bytes of data on startup pipe\n", - ret); + printf("%d bytes of data on startup pipe\n", ret); close(3); ret = execl(program, program, NULL); - fprintf(stderr, "execl(%s) failed: %d (%s)\n", - program, errno, strerror(errno)); + printf("execl(%s) failed: %d (%s)\n", + program, errno, strerror(errno)); exit(EXIT_FAILURE); } else { @@ -221,7 +223,7 @@ static void child_output(struct child_data *child, uint32_t events, static void child_tickle(struct child_data *child) { if (child->output_seen && !child->exited) - kill(child->pid, SIGUSR2); + kill(child->pid, SIGUSR1); } static void child_stop(struct child_data *child) @@ -449,7 +451,8 @@ static const struct option options[] = { int main(int argc, char **argv) { int ret; - int timeout = 10; + int timeout = 10 * (1000 / SIGNAL_INTERVAL_MS); + int poll_interval = 5000; int cpus, i, j, c; int sve_vl_count, sme_vl_count; bool all_children_started = false; @@ -505,7 +508,7 @@ int main(int argc, char **argv) have_sme2 ? "present" : "absent"); if (timeout > 0) - ksft_print_msg("Will run for %ds\n", timeout); + ksft_print_msg("Will run for %d\n", timeout); else ksft_print_msg("Will run until terminated\n"); @@ -578,14 +581,14 @@ int main(int argc, char **argv) break; /* - * Timeout is counted in seconds with no output, the - * tests print during startup then are silent when - * running so this should ensure they all ran enough - * to install the signal handler, this is especially - * useful in emulation where we will both be slow and - * likely to have a large set of VLs. + * Timeout is counted in poll intervals with no + * output, the tests print during startup then are + * silent when running so this should ensure they all + * ran enough to install the signal handler, this is + * especially useful in emulation where we will both + * be slow and likely to have a large set of VLs. */ - ret = epoll_wait(epoll_fd, evs, tests, 1000); + ret = epoll_wait(epoll_fd, evs, tests, poll_interval); if (ret < 0) { if (errno == EINTR) continue; @@ -623,10 +626,12 @@ int main(int argc, char **argv) } all_children_started = true; + poll_interval = SIGNAL_INTERVAL_MS; } - ksft_print_msg("Sending signals, timeout remaining: %d\n", - timeout); + if ((timeout % LOG_INTERVALS) == 0) + ksft_print_msg("Sending signals, timeout remaining: %d\n", + timeout); for (i = 0; i < num_children; i++) child_tickle(&children[i]); @@ -651,7 +656,5 @@ int main(int argc, char **argv) drain_output(true); - ksft_print_cnts(); - - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 8b960d01ed2e..f89d67894c2e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -134,8 +134,7 @@ function check_vreg b memcmp endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -143,7 +142,6 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random V-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #7 movi v9.16b, #9 movi v31.8b, #31 @@ -215,6 +213,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index e8da3b4cbd23..859345379044 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -267,6 +267,10 @@ int main(void) strerror(errno), errno); sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR1, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR1 handler: %s (%d)\n", + strerror(errno), errno); ret = sigaction(SIGUSR2, &sa, NULL); if (ret < 0) printf("Failed to install SIGUSR2 handler: %s (%d)\n", diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h index 9292bba5400b..85b9184e0835 100644 --- a/tools/testing/selftests/arm64/fp/sme-inst.h +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -5,6 +5,8 @@ #ifndef SME_INST_H #define SME_INST_H +#define REG_FPMR S3_3_C4_C4_2 + /* * RDSVL X\nx, #\imm */ diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 6d61992fe8a0..577b6e05e860 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -82,10 +82,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -340,7 +342,7 @@ static void ptrace_set_sve_get_sve_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -441,7 +443,7 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -545,7 +547,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, read_sve = read_buf; if (read_sve->vl != vl) { - ksft_test_result_fail("Child VL != expected VL %d\n", + ksft_test_result_fail("Child VL != expected VL: %u != %u\n", read_sve->vl, vl); goto out; } @@ -555,7 +557,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_FPSIMD: expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } @@ -571,7 +573,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_SVE: expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index fff60e2a25ad..80e072f221cd 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -291,8 +291,7 @@ function check_ffr #endif endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -300,13 +299,12 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random Z-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #1 movi v9.16b, #2 movi v31.8b, #3 -#ifndef SSVE // And P0 - rdffr p0.b + ptrue p0.d +#ifndef SSVE // And FFR wrffr p15.b #endif @@ -378,6 +376,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // Irritation signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c index ac27d87396fc..08c777f87ea2 100644 --- a/tools/testing/selftests/arm64/fp/za-ptrace.c +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -48,10 +48,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -201,7 +203,7 @@ static void ptrace_set_get_data(pid_t child, unsigned int vl) data_size = ZA_PT_SIZE(vq); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for VL %u\n", data_size, vl); return; } diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 095b45531640..9c33e13e9dc4 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -148,21 +148,16 @@ function check_za b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZA data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZA to all bits 0 + smstop + smstart_za ret endfunction @@ -231,6 +226,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c index 996d9614a131..584b8d59b7ea 100644 --- a/tools/testing/selftests/arm64/fp/zt-ptrace.c +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -43,10 +43,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -231,7 +233,7 @@ static void ptrace_enable_za_via_zt(pid_t child) /* Should have register data */ if (za_out->size < ZA_PT_SIZE(vq)) { ksft_print_msg("ZA data less than expected: %u < %u\n", - za_out->size, ZA_PT_SIZE(vq)); + za_out->size, (unsigned int)ZA_PT_SIZE(vq)); fail = true; vq = 0; } diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index b5c81e81a379..38080f3c3280 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -117,21 +117,16 @@ function check_zt b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZT data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZT to all bits 0 + smstop + smstart_za ret endfunction @@ -200,6 +195,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore new file mode 100644 index 000000000000..bbb8e40a7e52 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/.gitignore @@ -0,0 +1,7 @@ +basic-gcs +libc-gcs +gcs-locking +gcs-stress +gcs-stress-thread +gcspushm +gcsstr diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile new file mode 100644 index 000000000000..d2f3497a9103 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2023 ARM Limited +# +# In order to avoid interaction with the toolchain and dynamic linker the +# portions of these tests that interact with the GCS are implemented using +# nolibc. +# + +TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress gcspushm gcsstr +TEST_GEN_PROGS_EXTENDED := gcs-stress-thread + +LDLIBS+=-lpthread + +include ../../lib.mk + +$(OUTPUT)/basic-gcs: basic-gcs.c + $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -static -include ../../../../include/nolibc/nolibc.h \ + -I../../../../../usr/include \ + -std=gnu99 -I../.. -g \ + -ffreestanding -Wall $^ -o $@ -lgcc + +$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcspushm: gcspushm.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcsstr: gcsstr.S + $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/gcs/asm-offsets.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/asm-offsets.h diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c new file mode 100644 index 000000000000..3fb9742342a3 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#include <limits.h> +#include <stdbool.h> + +#include <linux/prctl.h> + +#include <sys/mman.h> +#include <asm/mman.h> +#include <linux/sched.h> + +#include "kselftest.h" +#include "gcs-util.h" + +/* nolibc doesn't have sysconf(), just hard code the maximum */ +static size_t page_size = 65536; + +static __attribute__((noinline)) void valid_gcs_function(void) +{ + /* Do something the compiler can't optimise out */ + my_syscall1(__NR_prctl, PR_SVE_GET_VL); +} + +static inline int gcs_set_status(unsigned long mode) +{ + bool enabling = mode & PR_SHADOW_STACK_ENABLE; + int ret; + unsigned long new_mode; + + /* + * The prctl takes 1 argument but we need to ensure that the + * other 3 values passed in registers to the syscall are zero + * since the kernel validates them. + */ + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, + 0, 0, 0); + + if (ret == 0) { + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &new_mode, 0, 0, 0); + if (ret == 0) { + if (new_mode != mode) { + ksft_print_msg("Mode set to %lx not %lx\n", + new_mode, mode); + ret = -EINVAL; + } + } else { + ksft_print_msg("Failed to validate mode: %d\n", ret); + } + + if (enabling != chkfeat_gcs()) { + ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n", + enabling ? "" : "not ", + chkfeat_gcs() ? "" : "not "); + ret = -EINVAL; + } + } + + return ret; +} + +/* Try to read the status */ +static bool read_status(void) +{ + unsigned long state; + int ret; + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &state, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("Failed to read state: %d\n", ret); + return false; + } + + return state & PR_SHADOW_STACK_ENABLE; +} + +/* Just a straight enable */ +static bool base_enable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret); + return false; + } + + return true; +} + +/* Check we can read GCSPR_EL0 when GCS is enabled */ +static bool read_gcspr_el0(void) +{ + unsigned long *gcspr_el0; + + ksft_print_msg("GET GCSPR\n"); + gcspr_el0 = get_gcspr(); + ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0); + + return true; +} + +/* Also allow writes to stack */ +static bool enable_writeable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Also allow writes to stack */ +static bool enable_push_pop(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Enable GCS and allow everything */ +static bool enable_all(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH | + PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +static bool enable_invalid(void) +{ + int ret = gcs_set_status(ULONG_MAX); + if (ret == 0) { + ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX); + return false; + } + + return true; +} + +/* Map a GCS */ +static bool map_guarded_stack(void) +{ + int ret; + uint64_t *buf; + uint64_t expected_cap; + int elem; + bool pass = true; + + buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size, + SHADOW_STACK_SET_MARKER | + SHADOW_STACK_SET_TOKEN); + if (buf == MAP_FAILED) { + ksft_print_msg("Failed to map %lu byte GCS: %d\n", + page_size, errno); + return false; + } + ksft_print_msg("Mapped GCS at %p-%p\n", buf, + (void *)((uint64_t)buf + page_size)); + + /* The top of the newly allocated region should be 0 */ + elem = (page_size / sizeof(uint64_t)) - 1; + if (buf[elem]) { + ksft_print_msg("Last entry is 0x%llx not 0x0\n", buf[elem]); + pass = false; + } + + /* Then a valid cap token */ + elem--; + expected_cap = ((uint64_t)buf + page_size - 16); + expected_cap &= GCS_CAP_ADDR_MASK; + expected_cap |= GCS_CAP_VALID_TOKEN; + if (buf[elem] != expected_cap) { + ksft_print_msg("Cap entry is 0x%llx not 0x%llx\n", + buf[elem], expected_cap); + pass = false; + } + ksft_print_msg("cap token is 0x%llx\n", buf[elem]); + + /* The rest should be zeros */ + for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) { + if (!buf[elem]) + continue; + ksft_print_msg("GCS slot %d is 0x%llx not 0x0\n", + elem, buf[elem]); + pass = false; + } + + ret = munmap(buf, page_size); + if (ret != 0) { + ksft_print_msg("Failed to unmap %ld byte GCS: %d\n", + page_size, errno); + pass = false; + } + + return pass; +} + +/* A fork()ed process can run */ +static bool test_fork(void) +{ + unsigned long child_mode; + int ret, status; + pid_t pid; + bool pass = true; + + pid = fork(); + if (pid == -1) { + ksft_print_msg("fork() failed: %d\n", errno); + pass = false; + goto out; + } + if (pid == 0) { + /* In child, make sure we can call a function, read + * the GCS pointer and status and then exit */ + valid_gcs_function(); + get_gcspr(); + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &child_mode, 0, 0, 0); + if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in child\n"); + ret = -EINVAL; + } + + exit(ret); + } + + /* + * In parent, check we can still do function calls then block + * for the child. + */ + valid_gcs_function(); + + ksft_print_msg("Waiting for child %d\n", pid); + + ret = waitpid(pid, &status, 0); + if (ret == -1) { + ksft_print_msg("Failed to wait for child: %d\n", + errno); + return false; + } + + if (!WIFEXITED(status)) { + ksft_print_msg("Child exited due to signal %d\n", + WTERMSIG(status)); + pass = false; + } else { + if (WEXITSTATUS(status)) { + ksft_print_msg("Child exited with status %d\n", + WEXITSTATUS(status)); + pass = false; + } + } + +out: + + return pass; +} + +typedef bool (*gcs_test)(void); + +static struct { + char *name; + gcs_test test; + bool needs_enable; +} tests[] = { + { "read_status", read_status }, + { "base_enable", base_enable, true }, + { "read_gcspr_el0", read_gcspr_el0 }, + { "enable_writeable", enable_writeable, true }, + { "enable_push_pop", enable_push_pop, true }, + { "enable_all", enable_all, true }, + { "enable_invalid", enable_invalid, true }, + { "map_guarded_stack", map_guarded_stack }, + { "fork", test_fork }, +}; + +int main(void) +{ + int i, ret; + unsigned long gcs_mode; + + ksft_print_header(); + + /* + * We don't have getauxval() with nolibc so treat a failure to + * read GCS state as a lack of support and skip. + */ + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_skip("Failed to read GCS state: %d\n", ret); + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret); + } + + ksft_set_plan(ARRAY_SIZE(tests)); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name); + } + + /* One last test: disable GCS, we can do this one time */ + my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + if (ret != 0) + ksft_print_msg("Failed to disable GCS: %d\n", ret); + + ksft_finished(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c new file mode 100644 index 000000000000..989f75a491b7 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + * + * Tests for GCS mode locking. These tests rely on both having GCS + * unconfigured on entry and on the kselftest harness running each + * test in a fork()ed process which will have it's own mode. + */ + +#include <limits.h> + +#include <sys/auxv.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* No mode bits are rejected for locking */ +TEST(lock_all_modes) +{ + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0); + ASSERT_EQ(ret, 0); +} + +FIXTURE(valid_modes) +{ +}; + +FIXTURE_VARIANT(valid_modes) +{ + unsigned long mode; +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable) +{ + .mode = PR_SHADOW_STACK_ENABLE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | + PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_SETUP(valid_modes) +{ +} + +FIXTURE_TEARDOWN(valid_modes) +{ +} + +/* We can set the mode at all */ +TEST_F(valid_modes, set) +{ + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + _exit(0); +} + +/* Enabling, locking then disabling is rejected */ +TEST_F(valid_modes, enable_lock_disable) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0); + ASSERT_EQ(ret, -EBUSY); + + _exit(0); +} + +/* Locking then enabling is rejected */ +TEST_F(valid_modes, lock_enable) +{ + unsigned long mode; + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, -EBUSY); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, 0); + + _exit(0); +} + +/* Locking then changing other modes is fine */ +TEST_F(valid_modes, lock_enable_disable_others) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + PR_SHADOW_STACK_ALL_MODES); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); + + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + _exit(0); +} + +int main(int argc, char **argv) +{ + unsigned long mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (mode & PR_SHADOW_STACK_ENABLE) { + ksft_print_msg("GCS was enabled, test unsupported\n"); + return KSFT_SKIP; + } + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S new file mode 100644 index 000000000000..b88b25217da5 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S @@ -0,0 +1,311 @@ +// Program that loops for ever doing lots of recursions and system calls, +// intended to be used as part of a stress test for GCS context switching. +// +// Copyright 2015-2023 Arm Ltd + +#include <asm/unistd.h> + +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 + +#define si_code 8 + +#define SIGINT 2 +#define SIGABRT 6 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGTERM 15 +#define SEGV_CPERR 10 + +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 + +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b abort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + + +function tickle_handler + // Perhaps collect GCSPR_EL0 here in future? + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error\n" + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +function segv_handler + // stash the siginfo_t * + mov x20, x1 + + // Disable GCS, we don't want additional faults logging things + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + + puts "Got SIGSEGV code " + + ldr x21, [x20, #si_code] + mov x0, x21 + bl putdec + + // GCS faults should have si_code SEGV_CPERR + cmp x21, #SEGV_CPERR + bne 1f + + puts " (GCS violation)" +1: + mov x0, '\n' + bl putc + b abort +endfunction + +// Recurse x20 times +.macro recurse id +function recurse\id + stp x29, x30, [sp, #-16]! + mov x29, sp + + cmp x20, 0 + beq 1f + sub x20, x20, 1 + bl recurse\id + +1: + ldp x29, x30, [sp], #16 + + // Do a syscall immediately prior to returning to try to provoke + // scheduling and migration at a point where coherency issues + // might trigger. + mov x8, #__NR_getpid + svc #0 + + ret +endfunction +.endm + +// Generate and use two copies so we're changing the GCS contents +recurse 1 +recurse 2 + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS\n" + b abort +1: + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGSEGV + adr x1, segv_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + puts "Running\n" + +loop: + // Small recursion depth so we're frequently flipping between + // the two recursors and changing what's on the stack + mov x20, #5 + bl recurse1 + mov x20, #5 + bl recurse2 + b loop +endfunction + +abort: + mov x0, #255 + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c new file mode 100644 index 000000000000..bbc7f4950c13 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022-3 ARM Limited. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 199309L + +#include <errno.h> +#include <getopt.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/epoll.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/hwcap.h> + +#include "../../kselftest.h" + +struct child_data { + char *name, *output; + pid_t pid; + int stdout; + bool output_seen; + bool exited; + int exit_status; + int exit_signal; +}; + +static int epoll_fd; +static struct child_data *children; +static struct epoll_event *evs; +static int tests; +static int num_children; +static bool terminate; + +static int startup_pipe[2]; + +static int num_processors(void) +{ + long nproc = sysconf(_SC_NPROCESSORS_CONF); + if (nproc < 0) { + perror("Unable to read number of processors\n"); + exit(EXIT_FAILURE); + } + + return nproc; +} + +static void start_thread(struct child_data *child, int id) +{ + int ret, pipefd[2], i; + struct epoll_event ev; + + ret = pipe(pipefd); + if (ret != 0) + ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", + strerror(errno), errno); + + child->pid = fork(); + if (child->pid == -1) + ksft_exit_fail_msg("fork() failed: %s (%d)\n", + strerror(errno), errno); + + if (!child->pid) { + /* + * In child, replace stdout with the pipe, errors to + * stderr from here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Duplicate the read side of the startup pipe to + * FD 3 so we can close everything else. + */ + ret = dup2(startup_pipe[0], 3); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Very dumb mechanism to clean open FDs other than + * stdio. We don't want O_CLOEXEC for the pipes... + */ + for (i = 4; i < 8192; i++) + close(i); + + /* + * Read from the startup pipe, there should be no data + * and we should block until it is closed. We just + * carry on on error since this isn't super critical. + */ + ret = read(3, &i, sizeof(i)); + if (ret < 0) + fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); + if (ret > 0) + fprintf(stderr, "%d bytes of data on startup pipe\n", + ret); + close(3); + + ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL); + fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n", + errno, strerror(errno)); + + exit(EXIT_FAILURE); + } else { + /* + * In parent, remember the child and close our copy of the + * write side of stdout. + */ + close(pipefd[1]); + child->stdout = pipefd[0]; + child->output = NULL; + child->exited = false; + child->output_seen = false; + + ev.events = EPOLLIN | EPOLLHUP; + ev.data.ptr = child; + + ret = asprintf(&child->name, "Thread-%d", id); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); + if (ret < 0) { + ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", + child->name, strerror(errno), errno); + } + } + + ksft_print_msg("Started %s\n", child->name); + num_children++; +} + +static bool child_output_read(struct child_data *child) +{ + char read_data[1024]; + char work[1024]; + int ret, len, cur_work, cur_read; + + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + if (errno == EINTR) + return true; + + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), + errno); + return false; + } + len = ret; + + child->output_seen = true; + + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } + + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; + + if (work[cur_work] == '\n') { + work[cur_work] = '\0'; + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; + } + } + + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + + return false; +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + bool read_more; + + if (events & EPOLLIN) { + do { + read_more = child_output_read(child); + } while (read_more); + } + + if (events & EPOLLHUP) { + close(child->stdout); + child->stdout = -1; + flush = true; + } + + if (flush && child->output) { + ksft_print_msg("%s: %s<EOF>\n", child->name, child->output); + free(child->output); + child->output = NULL; + } +} + +static void child_tickle(struct child_data *child) +{ + if (child->output_seen && !child->exited) + kill(child->pid, SIGUSR1); +} + +static void child_stop(struct child_data *child) +{ + if (!child->exited) + kill(child->pid, SIGTERM); +} + +static void child_cleanup(struct child_data *child) +{ + pid_t ret; + int status; + bool fail = false; + + if (!child->exited) { + do { + ret = waitpid(child->pid, &status, 0); + if (ret == -1 && errno == EINTR) + continue; + + if (ret == -1) { + ksft_print_msg("waitpid(%d) failed: %s (%d)\n", + child->pid, strerror(errno), + errno); + fail = true; + break; + } + + if (WIFEXITED(status)) { + child->exit_status = WEXITSTATUS(status); + child->exited = true; + } + + if (WIFSIGNALED(status)) { + child->exit_signal = WTERMSIG(status); + ksft_print_msg("%s: Exited due to signal %d\n", + child->name, child->exit_signal); + fail = true; + child->exited = true; + } + } while (!child->exited); + } + + if (!child->output_seen) { + ksft_print_msg("%s no output seen\n", child->name); + fail = true; + } + + if (child->exit_status != 0) { + ksft_print_msg("%s exited with error code %d\n", + child->name, child->exit_status); + fail = true; + } + + ksft_test_result(!fail, "%s\n", child->name); +} + +static void handle_child_signal(int sig, siginfo_t *info, void *context) +{ + int i; + bool found = false; + + for (i = 0; i < num_children; i++) { + if (children[i].pid == info->si_pid) { + children[i].exited = true; + children[i].exit_status = info->si_status; + found = true; + break; + } + } + + if (!found) + ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", + info->si_pid, info->si_status); +} + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + int i; + + /* If we're already exiting then don't signal again */ + if (terminate) + return; + + ksft_print_msg("Got signal, exiting...\n"); + + terminate = true; + + /* + * This should be redundant, the main loop should clean up + * after us, but for safety stop everything we can here. + */ + for (i = 0; i < num_children; i++) + child_stop(&children[i]); +} + +/* Handle any pending output without blocking */ +static void drain_output(bool flush) +{ + int ret = 1; + int i; + + while (ret > 0) { + ret = epoll_wait(epoll_fd, evs, tests, 0); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_print_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + for (i = 0; i < ret; i++) + child_output(evs[i].data.ptr, evs[i].events, flush); + } +} + +static const struct option options[] = { + { "timeout", required_argument, NULL, 't' }, + { } +}; + +int main(int argc, char **argv) +{ + int seen_children; + bool all_children_started = false; + int gcs_threads; + int timeout = 10; + int ret, cpus, i, c; + struct sigaction sa; + + while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { + switch (c) { + case 't': + ret = sscanf(optarg, "%d", &timeout); + if (ret != 1) + ksft_exit_fail_msg("Failed to parse timeout %s\n", + optarg); + break; + default: + ksft_exit_fail_msg("Unknown argument\n"); + } + } + + cpus = num_processors(); + tests = 0; + + if (getauxval(AT_HWCAP) & HWCAP_GCS) { + /* One extra thread, trying to trigger migrations */ + gcs_threads = cpus + 1; + tests += gcs_threads; + } else { + gcs_threads = 0; + } + + ksft_print_header(); + ksft_set_plan(tests); + + ksft_print_msg("%d CPUs, %d GCS threads\n", + cpus, gcs_threads); + + if (!tests) + ksft_exit_skip("No tests scheduled\n"); + + if (timeout > 0) + ksft_print_msg("Will run for %ds\n", timeout); + else + ksft_print_msg("Will run until terminated\n"); + + children = calloc(sizeof(*children), tests); + if (!children) + ksft_exit_fail_msg("Unable to allocate child data\n"); + + ret = epoll_create1(EPOLL_CLOEXEC); + if (ret < 0) + ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", + strerror(errno), ret); + epoll_fd = ret; + + /* Create a pipe which children will block on before execing */ + ret = pipe(startup_pipe); + if (ret != 0) + ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", + strerror(errno), errno); + + /* Get signal handers ready before we start any children */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGINT, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", + strerror(errno), errno); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + sa.sa_sigaction = handle_child_signal; + ret = sigaction(SIGCHLD, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", + strerror(errno), errno); + + evs = calloc(tests, sizeof(*evs)); + if (!evs) + ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + tests); + + for (i = 0; i < gcs_threads; i++) + start_thread(&children[i], i); + + /* + * All children started, close the startup pipe and let them + * run. + */ + close(startup_pipe[0]); + close(startup_pipe[1]); + + timeout *= 10; + for (;;) { + /* Did we get a signal asking us to exit? */ + if (terminate) + break; + + /* + * Timeout is counted in 100ms with no output, the + * tests print during startup then are silent when + * running so this should ensure they all ran enough + * to install the signal handler, this is especially + * useful in emulation where we will both be slow and + * likely to have a large set of VLs. + */ + ret = epoll_wait(epoll_fd, evs, tests, 100); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + /* Output? */ + if (ret > 0) { + for (i = 0; i < ret; i++) { + child_output(evs[i].data.ptr, evs[i].events, + false); + } + continue; + } + + /* Otherwise epoll_wait() timed out */ + + /* + * If the child processes have not produced output they + * aren't actually running the tests yet. + */ + if (!all_children_started) { + seen_children = 0; + + for (i = 0; i < num_children; i++) + if (children[i].output_seen || + children[i].exited) + seen_children++; + + if (seen_children != num_children) { + ksft_print_msg("Waiting for %d children\n", + num_children - seen_children); + continue; + } + + all_children_started = true; + } + + ksft_print_msg("Sending signals, timeout remaining: %d00ms\n", + timeout); + + for (i = 0; i < num_children; i++) + child_tickle(&children[i]); + + /* Negative timeout means run indefinitely */ + if (timeout < 0) + continue; + if (--timeout == 0) + break; + } + + ksft_print_msg("Finishing up...\n"); + terminate = true; + + for (i = 0; i < tests; i++) + child_stop(&children[i]); + + drain_output(false); + + for (i = 0; i < tests; i++) + child_cleanup(&children[i]); + + drain_output(true); + + ksft_finished(); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h new file mode 100644 index 000000000000..c99a6b39ac14 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-util.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Limited. + */ + +#ifndef GCS_UTIL_H +#define GCS_UTIL_H + +#include <stdbool.h> + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +#ifndef NT_ARM_GCS +#define NT_ARM_GCS 0x410 + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; +#endif + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define PR_SHADOW_STACK_ALL_MODES \ + PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */ + +#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL) +#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL) +#define GCS_CAP_VALID_TOKEN 1 +#define GCS_CAP_IN_PROGRESS_TOKEN 5 + +#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) + +static inline unsigned long *get_gcspr(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); + + return gcspr; +} + +static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline unsigned long __attribute__((always_inline)) *gcsss2(void) +{ + unsigned long *Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +static inline bool chkfeat_gcs(void) +{ + register long val __asm__ ("x16") = 1; + + /* CHKFEAT x16 */ + asm volatile( + "hint #0x28\n" + : "=r" (val) + : "r" (val)); + + return val != 1; +} + +#endif diff --git a/tools/testing/selftests/arm64/gcs/gcspushm.S b/tools/testing/selftests/arm64/gcs/gcspushm.S new file mode 100644 index 000000000000..bbe17c1325ac --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcspushm.S @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS push permissions then use them + +#include <asm/unistd.h> + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with push permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + sys #3, c7, c7, #0, x0 // GCSPUSHM + sysl x0, #3, c7, c7, #1 // GCSPOPM + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcsstr.S b/tools/testing/selftests/arm64/gcs/gcsstr.S new file mode 100644 index 000000000000..a42bba6e30b1 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcsstr.S @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS write permissions then use them + +#include <asm/unistd.h> + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with write permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + mrs x0, GCSPR_EL0 + sub x0, x0, #8 + .inst 0xd91f1c01 // GCSSTR x1, x0 + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c new file mode 100644 index 000000000000..17b2fabfec38 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <stdbool.h> + +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/uio.h> + +#include <asm/hwcap.h> +#include <asm/mman.h> + +#include <linux/compiler.h> + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +static noinline void gcs_recurse(int depth) +{ + if (depth) + gcs_recurse(depth - 1); + + /* Prevent tail call optimization so we actually recurse */ + asm volatile("dsb sy" : : : "memory"); +} + +/* Smoke test that a function call and return works*/ +TEST(can_call_function) +{ + gcs_recurse(0); +} + +static void *gcs_test_thread(void *arg) +{ + int ret; + unsigned long mode; + + /* + * Some libcs don't seem to fill unused arguments with 0 but + * the kernel validates this so we supply all 5 arguments. + */ + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret); + return NULL; + } + + if (!(mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in thread, mode is %lu\n", + mode); + return NULL; + } + + /* Just in case... */ + gcs_recurse(0); + + /* Use a non-NULL value to indicate a pass */ + return &gcs_test_thread; +} + +/* Verify that if we start a new thread it has GCS enabled */ +TEST(gcs_enabled_thread) +{ + pthread_t thread; + void *thread_ret; + int ret; + + ret = pthread_create(&thread, NULL, gcs_test_thread, NULL); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ret = pthread_join(thread, &thread_ret); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ASSERT_TRUE(thread_ret != NULL); +} + +/* Read the GCS until we find the terminator */ +TEST(gcs_find_terminator) +{ + unsigned long *gcs, *cur; + + gcs = get_gcspr(); + cur = gcs; + while (*cur) + cur++; + + ksft_print_msg("GCS in use from %p-%p\n", gcs, cur); + + /* + * We should have at least whatever called into this test so + * the two pointer should differ. + */ + ASSERT_TRUE(gcs != cur); +} + +/* + * We can access a GCS via ptrace + * + * This could usefully have a fixture but note that each test is + * fork()ed into a new child whcih causes issues. Might be better to + * lift at least some of this out into a separate, non-harness, test + * program. + */ +TEST(ptrace_read_write) +{ + pid_t child, pid; + int ret, status; + siginfo_t si; + uint64_t val, rval, gcspr; + struct user_gcs child_gcs; + struct iovec iov, local_iov, remote_iov; + + child = fork(); + if (child == -1) { + ksft_print_msg("fork() failed: %d (%s)\n", + errno, strerror(errno)); + ASSERT_NE(child, -1); + } + + if (child == 0) { + /* + * In child, make sure there's something on the stack and + * ask to be traced. + */ + gcs_recurse(0); + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME %s", + strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP) %s", + strerror(errno)); + + return; + } + + ksft_print_msg("Child: %d\n", child); + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + ksft_print_msg("wait() failed: %s", + strerror(errno)); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_print_msg("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno)); + goto error; + } + } + + /* Where is the child GCS? */ + iov.iov_base = &child_gcs; + iov.iov_len = sizeof(child_gcs); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read child GCS state: %s (%d)\n", + strerror(errno), errno); + goto error; + } + + /* We should have inherited GCS over fork(), confirm */ + if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) { + ASSERT_TRUE(child_gcs.features_enabled & + PR_SHADOW_STACK_ENABLE); + goto error; + } + + gcspr = child_gcs.gcspr_el0; + ksft_print_msg("Child GCSPR 0x%lx, flags %llx, locked %llx\n", + gcspr, child_gcs.features_enabled, + child_gcs.features_locked); + + /* Ideally we'd cross check with the child memory map */ + + errno = 0; + val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL); + ret = errno; + if (ret != 0) + ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n", + strerror(ret), ret); + EXPECT_EQ(ret, 0); + + /* The child should be in a function, the GCSPR shouldn't be 0 */ + EXPECT_NE(val, 0); + + /* Same thing via process_vm_readv() */ + local_iov.iov_base = &rval; + local_iov.iov_len = sizeof(rval); + remote_iov.iov_base = (void *)gcspr; + remote_iov.iov_len = sizeof(rval); + ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0); + if (ret == -1) + ksft_print_msg("process_vm_readv() failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, sizeof(rval)); + EXPECT_EQ(val, rval); + + /* Write data via a peek */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* Restore what we had before */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* That's all, folks */ + kill(child, SIGKILL); + return; + +error: + kill(child, SIGKILL); + ASSERT_FALSE(true); +} + +FIXTURE(map_gcs) +{ + unsigned long *stack; +}; + +FIXTURE_VARIANT(map_gcs) +{ + size_t stack_size; + unsigned long flags; +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k) +{ + .stack_size = 2 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s3k_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k) +{ + .stack_size = 4 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k) +{ + .stack_size = 16 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k) +{ + .stack_size = 64 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k) +{ + .stack_size = 128 * 1024, + .flags = 0, +}; + +FIXTURE_SETUP(map_gcs) +{ + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, + variant->flags); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + variant->stack_size); +} + +FIXTURE_TEARDOWN(map_gcs) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, variant->stack_size); + ASSERT_EQ(ret, 0); + } +} + +/* The stack has a cap token */ +TEST_F(map_gcs, stack_capped) +{ + unsigned long *stack = self->stack; + size_t cap_index; + + cap_index = (variant->stack_size / sizeof(unsigned long)); + + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + + ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index])); +} + +/* The top of the stack is 0 */ +TEST_F(map_gcs, stack_terminated) +{ + unsigned long *stack = self->stack; + size_t term_index; + + if (!(variant->flags & SHADOW_STACK_SET_MARKER)) + return; + + term_index = (variant->stack_size / sizeof(unsigned long)) - 1; + + ASSERT_EQ(stack[term_index], 0); +} + +/* Writes should fault */ +TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV) +{ + self->stack[0] = 0; +} + +/* Put it all together, we can safely switch to and from the stack */ +TEST_F(map_gcs, stack_switch) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + get_gcspr(), orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* We should be able to use all but 2 slots of the new stack */ + ksft_print_msg("Recursing %zu levels\n", cap_index - 1); + gcs_recurse(cap_index - 1); + + /* Pivot back to the original GCS */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + + gcs_recurse(0); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +/* We fault if we try to go beyond the end of the stack */ +TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test but we need to SEGV to avoid a false fail */ + orig_gcspr_el0 = get_gcspr(); + *orig_gcspr_el0 = 0; + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* Now try to recurse, we should fault doing this. */ + ksft_print_msg("Recursing %zu levels...\n", cap_index + 1); + gcs_recurse(cap_index + 1); + ksft_print_msg("...done\n"); + + /* Clean up properly to try to guard against spurious passes. */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +FIXTURE(map_invalid_gcs) +{ +}; + +FIXTURE_VARIANT(map_invalid_gcs) +{ + size_t stack_size; +}; + +FIXTURE_SETUP(map_invalid_gcs) +{ +} + +FIXTURE_TEARDOWN(map_invalid_gcs) +{ +} + +/* GCS must be larger than 16 bytes */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small) +{ + .stack_size = 8, +}; + +/* GCS size must be 16 byte aligned */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 }; + +TEST_F(map_invalid_gcs, do_map) +{ + void *stack; + + stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, 0); + ASSERT_TRUE(stack == MAP_FAILED); + if (stack != MAP_FAILED) + munmap(stack, variant->stack_size); +} + +FIXTURE(invalid_mprotect) +{ + unsigned long *stack; + size_t stack_size; +}; + +FIXTURE_VARIANT(invalid_mprotect) +{ + unsigned long flags; +}; + +FIXTURE_SETUP(invalid_mprotect) +{ + self->stack_size = sysconf(_SC_PAGE_SIZE); + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + self->stack_size, 0); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + self->stack_size); +} + +FIXTURE_TEARDOWN(invalid_mprotect) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, self->stack_size); + ASSERT_EQ(ret, 0); + } +} + +FIXTURE_VARIANT_ADD(invalid_mprotect, exec) +{ + .flags = PROT_EXEC, +}; + +TEST_F(invalid_mprotect, do_map) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, variant->flags); + ASSERT_EQ(ret, -1); +} + +TEST_F(invalid_mprotect, do_map_read) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, + variant->flags | PROT_READ); + ASSERT_EQ(ret, -1); +} + +int main(int argc, char **argv) +{ + unsigned long gcs_mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + /* + * Force shadow stacks on, our tests *should* be fine with or + * without libc support and with or without this having ended + * up tagged for GCS and enabled by the dynamic linker. We + * can't use the libc prctl() function since we can't return + * from enabling the stack. + */ + ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode); + if (ret) { + ksft_print_msg("Failed to configure GCS: %d\n", ret); + return EXIT_FAILURE; + } + } + + /* Avoid returning in case libc doesn't understand GCS */ + exit(test_harness_run(argc, argv)); +} diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 1dbbbd47dd50..2ee7f114d7fa 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -91,7 +91,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } @@ -189,7 +189,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c new file mode 100644 index 000000000000..303260a6dc65 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Ampere Computing LLC + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +static bool is_hugetlb_allocated(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return false; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugetlb: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + + if (hps > 0) + return true; + + return false; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void allocate_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 2); +} + +static void free_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 0); +} + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0; i < size; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag (%d) mismatch\n", i); + fault = 1; + goto check_child_tag_inheritance_err; + } + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)map_ptr, map_size); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)map_ptr, map_size); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, map_size, mode, tag_check); + mte_clear_tags((void *)ptr, map_size); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *map_ptr; + int prot_flag, result; + unsigned long map_size; + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_size = default_huge_page_size(); + map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(map_ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, + 0, 0); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(map_ptr, map_size, mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, 0, 0); + if (result != KSFT_PASS) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, map_size, mode); + mte_free_memory_tag_range((void *)ptr, map_size, mem_type, 0, 0); + if (result == KSFT_FAIL) + return result; + + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + allocate_hugetlb(); + + if (!is_hugetlb_allocated()) { + ksft_print_msg("ERR: Unable allocate hugetlb pages\n"); + return KSFT_FAIL; + } + + /* Set test plan */ + ksft_set_plan(12); + + mte_enable_pstate_tco(); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, no error mode, mmap memory and tag check off\n"); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + free_hugetlb(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index f139a33a43ef..4c89e9538ca0 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -85,7 +85,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", - (ret & PR_MTE_TCF_MASK), mask); + (ret & (int)PR_MTE_TCF_MASK), mask); ksft_test_result_fail("%s\n", name); } } diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 2b1425b92b69..a3d1e23fe02a 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -65,7 +65,7 @@ static int check_single_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%x\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAG(tag)); result = KSFT_FAIL; @@ -97,7 +97,7 @@ static int check_multiple_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%lx\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAGS(excl_mask)); result = KSFT_FAIL; diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 00ffd34c66d3..a1dc2fe5285b 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -38,7 +38,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) if (cur_mte_cxt.trig_si_code == si->si_code) cur_mte_cxt.fault_valid = true; else - ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n", + ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=%llx, fault addr=%lx\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr); return; @@ -64,7 +64,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) exit(1); } } else if (signum == SIGBUS) { - ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && @@ -100,7 +100,7 @@ void *mte_insert_tags(void *ptr, size_t size) int align_size; if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return NULL; } align_size = MT_ALIGN_UP(size); @@ -112,7 +112,7 @@ void *mte_insert_tags(void *ptr, size_t size) void mte_clear_tags(void *ptr, size_t size) { if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return; } size = MT_ALIGN_UP(size); @@ -150,13 +150,13 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, map_flag |= MAP_PRIVATE; ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); if (ptr == MAP_FAILED) { - ksft_print_msg("FAIL: mmap allocation\n"); + ksft_perror("mmap()"); return NULL; } if (mem_type == USE_MPROTECT) { if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + ksft_perror("mprotect(PROT_MTE)"); munmap(ptr, size); - ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); return NULL; } } @@ -190,13 +190,13 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, size - index) != size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); @@ -217,12 +217,12 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, map_size - index) != map_size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, @@ -319,10 +319,9 @@ int mte_default_setup(void) unsigned long en = 0; int ret; - if (!(hwcaps2 & HWCAP2_MTE)) { - ksft_print_msg("SKIP: MTE features unavailable\n"); - return KSFT_SKIP; - } + if (!(hwcaps2 & HWCAP2_MTE)) + ksft_exit_skip("MTE features unavailable\n"); + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -359,7 +358,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { - perror(filename); + ksft_perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 2d3e71724e55..a0017a303beb 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -77,13 +77,13 @@ static inline void evaluate_test(int err, const char *msg) { switch (err) { case KSFT_PASS: - ksft_test_result_pass(msg); + ksft_test_result_pass("%s", msg); break; case KSFT_FAIL: - ksft_test_result_fail(msg); + ksft_test_result_fail("%s", msg); break; case KSFT_SKIP: - ksft_test_result_skip(msg); + ksft_test_result_skip("%s", msg); break; default: ksft_test_result_error("Unknown return code %d from %s", diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 72e290b0b10c..b5a1c80e0ead 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -7,8 +7,14 @@ CC := $(CROSS_COMPILE)gcc endif CFLAGS += -mbranch-protection=pac-ret + +# All supported LLVMs have PAC, test for GCC +ifeq ($(LLVM),1) +pauth_cc_support := 1 +else # check if the compiler supports ARMv8.3 and branch protection with PAuth pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) +endif ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index b743daa772f5..6d21b2fc758d 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -13,7 +13,7 @@ #include "../../kselftest_harness.h" #include "helper.h" -#define PAC_COLLISION_ATTEMPTS 10 +#define PAC_COLLISION_ATTEMPTS 1000 /* * The kernel sets TBID by default. So bits 55 and above should remain * untouched no matter what. @@ -182,6 +182,9 @@ int exec_sign_all(struct signatures *signed_vals, size_t val) return -1; } + close(new_stdin[1]); + close(new_stdout[0]); + return 0; } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index b2f2bfd5c6aa..b257db665a35 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -3,6 +3,7 @@ mangle_* fake_sigreturn_* fpmr_* poe_* +gcs_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index edb3613513b8..1381039fb36f 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2019 ARM Limited # Additional include paths needed by kselftest.h and local headers -CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) PROGS := $(patsubst %.c,%,$(SRCS)) diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h index 50948ce471cc..ca133b93375f 100644 --- a/tools/testing/selftests/arm64/signal/sve_helpers.h +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -18,4 +18,17 @@ extern unsigned int nvls; int sve_fill_vls(bool use_sme, int min_vls); +static inline uint64_t get_svcr(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, S3_3_C4_C2_2\n" + : "=r"(val) + : + : "cc"); + + return val; +} + #endif diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index 00051b40d71e..1304c8ec0f2f 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -7,6 +7,10 @@ * Each test provides its own tde struct tdescr descriptor to link with * this wrapper. Framework provides common helpers. */ + +#include <sys/auxv.h> +#include <sys/prctl.h> + #include <kselftest.h> #include "test_signals.h" @@ -16,6 +20,16 @@ struct tdescr *current = &tde; int main(int argc, char *argv[]) { + /* + * Ensure GCS is at least enabled throughout the tests if + * supported, otherwise the inability to return from the + * function that enabled GCS makes it very inconvenient to set + * up test cases. The prctl() may fail if GCS was locked by + * libc setup code. + */ + if (getauxval(AT_HWCAP) & HWCAP_GCS) + gcs_set_state(PR_SHADOW_STACK_ENABLE); + ksft_print_msg("%s :: %s\n", current->name, current->descr); if (test_setup(current) && test_init(current)) { test_run(current); @@ -23,5 +37,6 @@ int main(int argc, char *argv[]) } test_result(current); - return current->result; + /* Do not return in case GCS was enabled */ + exit(current->result); } diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index 1e6273d81575..ee75a2c25ce7 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -35,6 +35,7 @@ enum { FSME_BIT, FSME_FA64_BIT, FSME2_BIT, + FGCS_BIT, FMAX_END }; @@ -43,6 +44,7 @@ enum { #define FEAT_SME (1UL << FSME_BIT) #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) #define FEAT_SME2 (1UL << FSME2_BIT) +#define FEAT_GCS (1UL << FGCS_BIT) /* * A descriptor used to describe and configure a test case. @@ -69,6 +71,10 @@ struct tdescr { * Zero when no signal is expected on success */ int sig_ok; + /* + * expected si_code for sig_ok, or 0 to not check + */ + int sig_ok_code; /* signum expected on unsupported CPU features. */ int sig_unsupp; /* a timeout in second for test completion */ diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 0dc948db3a4a..5d3621921cfe 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -30,6 +30,7 @@ static char const *const feats_names[FMAX_END] = { " SME ", " FA64 ", " SME2 ", + " GCS ", }; #define MAX_FEATS_SZ 128 @@ -142,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td, "current->token ZEROED...test is probably broken!\n"); abort(); } - /* - * Trying to narrow down the SEGV to the ones generated by Kernel itself - * via arm64_notify_segfault(). This is a best-effort check anyway, and - * the si_code check may need to change if this aspect of the kernel - * ABI changes. - */ - if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { - fprintf(stdout, - "si_code != SEGV_ACCERR...test is probably broken!\n"); - abort(); + if (td->sig_ok_code) { + if (si->si_code != td->sig_ok_code) { + fprintf(stdout, "si_code is %d not %d\n", + si->si_code, td->sig_ok_code); + abort(); + } + } else { + /* + * Trying to narrow down the SEGV to the ones + * generated by Kernel itself via + * arm64_notify_segfault(). This is a best-effort + * check anyway, and the si_code check may need to + * change if this aspect of the kernel ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } } td->pass = 1; /* @@ -329,6 +339,8 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SME_FA64; if (getauxval(AT_HWCAP2) & HWCAP2_SME2) td->feats_supported |= FEAT_SME2; + if (getauxval(AT_HWCAP) & HWCAP_GCS) + td->feats_supported |= FEAT_GCS; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 762c8fe9c54a..36fc12b3cd60 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -6,6 +6,7 @@ #include <assert.h> #include <stdio.h> +#include <stdint.h> #include <string.h> #include <linux/compiler.h> @@ -18,6 +19,44 @@ void test_cleanup(struct tdescr *td); int test_run(struct tdescr *td); void test_result(struct tdescr *td); +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +/* + * The prctl takes 1 argument but we need to ensure that the other + * values passed in registers to the syscall are zero since the kernel + * validates them. + */ +#define gcs_set_state(state) \ + ({ \ + register long _num __asm__ ("x8") = __NR_prctl; \ + register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \ + register long _arg2 __asm__ ("x1") = (long)(state); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ + }) + +static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void) +{ + uint64_t val; + + asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val)); + + return val; +} + static inline bool feats_ok(struct tdescr *td) { if (td->feats_incompatible & td->feats_supported) diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c new file mode 100644 index 000000000000..6228448b2ae7 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +/* + * We should get this from asm/siginfo.h but the testsuite is being + * clever with redefining siginfo_t. + */ +#ifndef SEGV_CPERR +#define SEGV_CPERR 10 +#endif + +static inline void gcsss1(uint64_t Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static int gcs_op_fault_trigger(struct tdescr *td) +{ + /* + * The slot below our current GCS should be in a valid GCS but + * must not have a valid cap in it. + */ + gcsss1(get_gcspr_el0() - 8); + + return 0; +} + +static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + +struct tdescr tde = { + .name = "Invalid GCS operation", + .descr = "An invalid GCS operation generates the expected signal", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sig_ok_code = SEGV_CPERR, + .sanity_disabled = true, + .trigger = gcs_op_fault_trigger, + .run = gcs_op_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c new file mode 100644 index 000000000000..b405d82321da --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 64]; +} context; + +static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct gcs_context *gcs; + unsigned long expected, gcspr; + uint64_t *u64_val; + int ret; + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0); + if (ret != 0) { + fprintf(stderr, "Unable to query GCS status\n"); + return 1; + } + + /* We expect a cap to be added to the GCS in the signal frame */ + gcspr = get_gcspr_el0(); + gcspr -= 8; + fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr); + + if (!get_current_context(td, &context.uc, sizeof(context))) { + fprintf(stderr, "Failed getting context\n"); + return 1; + } + + /* Ensure that the signal restore token was consumed */ + u64_val = (uint64_t *)get_gcspr_el0() + 1; + if (*u64_val) { + fprintf(stderr, "GCS value at %p is %lx not 0\n", + u64_val, *u64_val); + return 1; + } + + fprintf(stderr, "Got context\n"); + + head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); + if (!head) { + fprintf(stderr, "No GCS context\n"); + return 1; + } + + gcs = (struct gcs_context *)head; + + /* Basic size validation is done in get_current_context() */ + + if (gcs->features_enabled != expected) { + fprintf(stderr, "Features enabled %llx but expected %lx\n", + gcs->features_enabled, expected); + return 1; + } + + if (gcs->gcspr != gcspr) { + fprintf(stderr, "Got GCSPR %llx but expected %lx\n", + gcs->gcspr, gcspr); + return 1; + } + + fprintf(stderr, "GCS context validated\n"); + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "GCS basics", + .descr = "Validate a GCS signal context", + .feats_required = FEAT_GCS, + .timeout = 3, + .run = gcs_regs, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c new file mode 100644 index 000000000000..faeabb18c4b2 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static uint64_t *gcs_page; + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +static bool alloc_gcs(struct tdescr *td) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, + page_size, 0); + if (gcs_page == MAP_FAILED) { + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", + page_size, errno); + return false; + } + + return true; +} + +static int gcs_write_fault_trigger(struct tdescr *td) +{ + /* Verify that the page is readable (ie, not completely unmapped) */ + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); + + /* A regular write should trigger a fault */ + gcs_page[0] = EINVAL; + + return 0; +} + +static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + + +struct tdescr tde = { + .name = "GCS write fault", + .descr = "Normal writes to a GCS segfault", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sanity_disabled = true, + .init = alloc_gcs, + .trigger = gcs_write_fault_trigger, + .run = gcs_write_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 6dbe48cf8b09..1dbca9afb13c 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -85,6 +85,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, fprintf(stderr, "Got expected size %u and VL %d\n", head->size, ssve->vl); + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e6daa94fcd2e..0c1a6b26afac 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -198,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) *err = "Bad size for fpmr_context"; new_flags |= FPMR_CTX; break; + case GCS_MAGIC: + if (flags & GCS_CTX) + *err = "Multiple GCS_MAGIC"; + if (head->size != sizeof(struct gcs_context)) + *err = "Bad size for gcs_context"; + new_flags |= GCS_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 9872b8912714..98b97efdda23 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -20,6 +20,7 @@ #define EXTRA_CTX (1 << 3) #define ZT_CTX (1 << 4) #define FPMR_CTX (1 << 5) +#define GCS_CTX (1 << 6) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index b9e13f27f1f9..badaead5326a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -91,6 +91,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, return 1; } + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e6533b3400de..e9c377001f93 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -5,7 +5,6 @@ bpf-syscall* test_verifier test_maps test_lru_map -test_lpm_map test_tag FEATURE-DUMP.libbpf FEATURE-DUMP.selftests @@ -16,7 +15,6 @@ fixdep /test_progs-cpuv4 test_verifier_log feature -test_sock urandom_read test_sockmap test_lirc_mode2_user @@ -24,7 +22,6 @@ test_flow_dissector flow_dissector_load test_tcpnotify_user test_libbpf -test_tcp_check_syncookie_user test_sysctl xdping test_cpp diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f04af11df8eb..7eeb3cbe18c7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,6 +10,7 @@ TOOLSDIR := $(abspath ../../..) LIBDIR := $(TOOLSDIR)/lib BPFDIR := $(LIBDIR)/bpf TOOLSINCDIR := $(TOOLSDIR)/include +TOOLSARCHINCDIR := $(TOOLSDIR)/arch/$(SRCARCH)/include BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool APIDIR := $(TOOLSINCDIR)/uapi ifneq ($(O),) @@ -44,7 +45,7 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -Wall -Werror -fno-omit-frame-pointer \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) + -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread @@ -82,8 +83,8 @@ CLANG_CPUV4 := 1 endif # Order correspond to 'make run_tests' order -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_sock test_sockmap \ +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ + test_sockmap \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -132,12 +133,10 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh \ test_flow_dissector.sh \ test_xdp_vlan_mode_generic.sh \ test_xdp_vlan_mode_native.sh \ test_lwt_ip_encap.sh \ - test_tcp_check_syncookie.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ @@ -154,10 +153,23 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = \ - flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ - xdp_features bpf_test_no_cfi.ko + bench \ + bpf_testmod.ko \ + bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko \ + bpf_test_no_cfi.ko \ + flow_dissector_load \ + runqslower \ + test_cpp \ + test_flow_dissector \ + test_lirc_mode2_user \ + veristat \ + xdp_features \ + xdp_hw_metadata \ + xdp_redirect_multi \ + xdp_synproxy \ + xdping \ + xskxceiver TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi @@ -263,7 +275,7 @@ $(OUTPUT)/%:%.c ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv)) LLD := lld else -LLD := ld +LLD := $(shell command -v $(LD)) endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds @@ -273,6 +285,7 @@ $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ + -Wno-unused-command-line-argument \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,--version-script=liburandom_read.map \ -fPIC -shared -o $@ @@ -281,6 +294,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r $(call msg,BINARY,,$@) $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + -Wno-unused-command-line-argument \ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,-rpath=. -o $@ @@ -294,15 +308,36 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod + $(Q)$(MAKE) $(submake_extras) -C bpf_testmod \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' $(Q)cp bpf_testmod/bpf_testmod.ko $@ $(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi + $(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ +$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' + $(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@ + +$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' + $(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@ + + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool ifneq ($(CROSS_COMPILE),) CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool @@ -319,8 +354,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ - EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL) @@ -335,7 +370,6 @@ JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o NETWORK_HELPERS := $(OUTPUT)/network_helpers.o -$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) @@ -347,14 +381,14 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) -$(OUTPUT)/test_tcp_check_syncookie_user: $(NETWORK_HELPERS) BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -365,7 +399,8 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -388,8 +423,8 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ - EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) @@ -397,7 +432,9 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \ + ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers @@ -446,6 +483,7 @@ endef IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) +BPF_TARGET_ENDIAN=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb) ifneq ($(CROSS_COMPILE),) CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) @@ -473,17 +511,17 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h # $4 - binary name define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v2 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4 define CLANG_CPUV4_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v4 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE @@ -623,10 +661,11 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_OUTPUT)/%: $$$$(%-deps) $(BPFTOOL) | $(TR # When the compiler generates a %.d file, only skel basenames (not # full paths) are specified as prerequisites for corresponding %.o -# file. This target makes %.skel.h basename dependent on full paths, -# linking generated %.d dependency with actual %.skel.h files. -$(notdir %.skel.h): $(TRUNNER_OUTPUT)/%.skel.h - @true +# file. vpath directives below instruct make to search for skel files +# in TRUNNER_OUTPUT, if they are not present in the working directory. +vpath %.skel.h $(TRUNNER_OUTPUT) +vpath %.lskel.h $(TRUNNER_OUTPUT) +vpath %.subskel.h $(TRUNNER_OUTPUT) endif @@ -713,6 +752,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ unpriv_helpers.c \ netlink_helpers.c \ jit_disasm_helpers.c \ + io_helpers.c \ test_loader.c \ xsk.c \ disasm.c \ @@ -722,6 +762,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/bpf_test_no_cfi.ko \ + $(OUTPUT)/bpf_test_modorder_x.ko \ + $(OUTPUT)/bpf_test_modorder_y.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ @@ -856,6 +898,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \ no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ bpf_test_no_cfi.ko \ + bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko \ liburandom_read.so) \ $(OUTPUT)/FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2ed0ef6f21ee..32e9f194d449 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -4,6 +4,7 @@ #include <argp.h> #include <unistd.h> #include <stdint.h> +#include "bpf_util.h" #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" @@ -72,7 +73,7 @@ static __always_inline void inc_counter(struct counter *counters) unsigned slot; if (unlikely(tid == 0)) - tid = syscall(SYS_gettid); + tid = sys_gettid(); /* multiplicative hashing, it's fast */ slot = 2654435769U * tid; diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index b0668f29f7b3..cd8ecd39c3f3 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -582,4 +582,10 @@ extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) + +struct bpf_iter_kmem_cache; +extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile new file mode 100644 index 000000000000..40b25b98ad1b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile @@ -0,0 +1,19 @@ +BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_modorder_x.ko + +obj-m += bpf_test_modorder_x.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c new file mode 100644 index 000000000000..0cc747fa912f --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/module.h> +#include <linux/init.h> + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_retx(void) +{ + return 'x'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_x_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_retx); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_x_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_x_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_x_ids, +}; + +static int __init bpf_test_modorder_x_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_x_set); +} + +static void __exit bpf_test_modorder_x_exit(void) +{ +} + +module_init(bpf_test_modorder_x_init); +module_exit(bpf_test_modorder_x_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module X"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile new file mode 100644 index 000000000000..52c3ab9d84e2 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile @@ -0,0 +1,19 @@ +BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_modorder_y.ko + +obj-m += bpf_test_modorder_y.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c new file mode 100644 index 000000000000..c627ee085d13 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/module.h> +#include <linux/init.h> + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_rety(void) +{ + return 'y'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_y_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_rety); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_y_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_y_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_y_ids, +}; + +static int __init bpf_test_modorder_y_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_y_set); +} + +static void __exit bpf_test_modorder_y_exit(void) +{ +} + +module_init(bpf_test_modorder_y_init); +module_exit(bpf_test_modorder_y_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module Y"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 6c3b4d4f173a..aeef86b3da74 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -40,6 +40,14 @@ DECLARE_TRACE(bpf_testmod_test_nullable_bare, TP_ARGS(ctx__nullable) ); +struct sk_buff; + +DECLARE_TRACE(bpf_testmod_test_raw_tp_null, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb) +); + + #undef BPF_TESTMOD_DECLARE_TRACE #ifdef DECLARE_TRACE_WRITABLE #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8835761d9a12..cc9dde507aba 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -245,6 +245,39 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) call_rcu(&ctx->rcu, testmod_free_cb); } +static struct bpf_testmod_ops3 *st_ops3; + +static int bpf_testmod_test_3(void) +{ + return 0; +} + +static int bpf_testmod_test_4(void) +{ + return 0; +} + +static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { + .test_1 = bpf_testmod_test_3, + .test_2 = bpf_testmod_test_4, +}; + +static void bpf_testmod_test_struct_ops3(void) +{ + if (st_ops3) + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) +{ + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) +{ + st_ops3->test_2(); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -380,6 +413,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); + (void)trace_bpf_testmod_test_raw_tp_null(NULL); + + bpf_testmod_test_struct_ops3(); + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); if (struct_arg3 != NULL) { @@ -461,7 +498,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { static int uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, __u64 *data) { regs->ax = 0x12345678deadbeef; @@ -584,6 +621,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) BTF_ID_LIST(bpf_testmod_dtor_ids) @@ -1094,6 +1133,10 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -1173,6 +1216,68 @@ struct bpf_struct_ops bpf_testmod_ops2 = { .owner = THIS_MODULE, }; +static int st_ops3_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops3) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops3 = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops3_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops3 = NULL; + mutex_unlock(&st_ops_mutex); +} + +static void test_1_recursion_detected(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", + u64_stats_read(&stats->misses)); +} + +static int st_ops3_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct bpf_testmod_ops3, test_1): + prog->aux->priv_stack_requested = true; + prog->aux->recursion_detected = test_1_recursion_detected; + fallthrough; + default: + break; + } + return 0; +} + +struct bpf_struct_ops bpf_testmod_ops3 = { + .verifier_ops = &bpf_testmod_verifier_ops3, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = st_ops3_reg, + .unreg = st_ops3_unreg, + .check_member = st_ops3_check_member, + .cfi_stubs = &__bpf_testmod_ops3, + .name = "bpf_testmod_ops3", + .owner = THIS_MODULE, +}; + static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) { return 0; @@ -1331,6 +1436,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index fb7dff47597a..356803d1c10e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -94,6 +94,11 @@ struct bpf_testmod_ops2 { int (*test_1)(void); }; +struct bpf_testmod_ops3 { + int (*test_1)(void); + int (*test_2)(void); +}; + struct st_ops_args { u64 a; }; diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 10587a29b967..5f6963a320d7 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> +#include <syscall.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) @@ -59,4 +60,15 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif +/* Availability of gettid across glibc versions is hit-and-miss, therefore + * fallback to syscall in this macro and use it everywhere. + */ +#ifndef sys_gettid +#define sys_gettid() syscall(SYS_gettid) +#endif + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm index a9746ca78777..da543b24c144 100644 --- a/tools/testing/selftests/bpf/config.vm +++ b/tools/testing/selftests/bpf/config.vm @@ -1,12 +1,15 @@ -CONFIG_9P_FS=y CONFIG_9P_FS_POSIX_ACL=y CONFIG_9P_FS_SECURITY=y +CONFIG_9P_FS=y CONFIG_CRYPTO_DEV_VIRTIO=y -CONFIG_NET_9P=y +CONFIG_FUSE_FS=y +CONFIG_FUSE_PASSTHROUGH=y CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/io_helpers.c b/tools/testing/selftests/bpf/io_helpers.c new file mode 100644 index 000000000000..4ada0a74aa1f --- /dev/null +++ b/tools/testing/selftests/bpf/io_helpers.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/select.h> +#include <unistd.h> +#include <errno.h> + +int read_with_timeout(int fd, char *buf, size_t count, long usec) +{ + const long M = 1000 * 1000; + struct timeval tv = { usec / M, usec % M }; + fd_set fds; + int err; + + FD_ZERO(&fds); + FD_SET(fd, &fds); + err = select(fd + 1, &fds, NULL, NULL, &tv); + if (err < 0) + return err; + if (FD_ISSET(fd, &fds)) + return read(fd, buf, count); + return -EAGAIN; +} diff --git a/tools/testing/selftests/bpf/io_helpers.h b/tools/testing/selftests/bpf/io_helpers.h new file mode 100644 index 000000000000..21e1134cd3ce --- /dev/null +++ b/tools/testing/selftests/bpf/io_helpers.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> + +/* As a regular read(2), but allows to specify a timeout in micro-seconds. + * Returns -EAGAIN on timeout. + */ +int read_with_timeout(int fd, char *buf, size_t count, long usec); diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c index d98c72dc563e..d32e4edac930 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c @@ -20,10 +20,12 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <endian.h> #include <arpa/inet.h> #include <sys/time.h> #include <bpf/bpf.h> +#include <test_maps.h> #include "bpf_util.h" @@ -33,6 +35,22 @@ struct tlpm_node { uint8_t key[]; }; +struct lpm_trie_bytes_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned char data[8]; +}; + +struct lpm_trie_int_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned int data; +}; + static struct tlpm_node *tlpm_match(struct tlpm_node *list, const uint8_t *key, size_t n_bits); @@ -223,7 +241,7 @@ static void test_lpm_map(int keysize) n_matches = 0; n_matches_after_delete = 0; n_nodes = 1 << 8; - n_lookups = 1 << 16; + n_lookups = 1 << 9; data = alloca(keysize); memset(data, 0, keysize); @@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void) close(map_fd); } -int main(void) +static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int fd; + + opts.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries, + &opts); + CHECK(fd < 0, "bpf_map_create", "error %d\n", errno); + + return fd; +} + +static void test_lpm_trie_update_flags(void) +{ + struct lpm_trie_int_key key; + unsigned int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* overwrite an empty qp-trie (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add the same node as new node (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "add new elem again", "error %d\n", err); + + /* overwrite the existed node */ + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the node */ + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "update elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite a non-existent node which is the prefix of the first + * node (Error). + */ + key.prefixlen = 8; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + /* add a new node which is the prefix of the first node */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add another new node which will be the sibling of the first node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the third node */ + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* delete the second node to make it an intermediate node */ + key.prefixlen = 8; + key.data = 0; + err = bpf_map_delete_elem(fd, &key); + CHECK(err, "del elem", "error %d\n", err); + + /* overwrite the intermediate node (Error) */ + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + close(fd); +} + +static void test_lpm_trie_update_full_map(void) +{ + struct lpm_trie_int_key key; + int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 8; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* try to add more node (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err); + + /* update the value of an existed node with BPF_EXIST */ + key.prefixlen = 16; + key.data = 0; + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* update the value of an existed node with BPF_ANY */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + close(fd); +} + +static int cmp_str(const void *a, const void *b) +{ + const char *str_a = *(const char **)a, *str_b = *(const char **)b; + + return strcmp(str_a, str_b); +} + +/* Save strings in LPM trie. The trailing '\0' for each string will be + * accounted in the prefixlen. The strings returned during the iteration + * should be sorted as expected. + */ +static void test_lpm_trie_iterate_strs(void) +{ + static const char * const keys[] = { + "ab", "abO", "abc", "abo", "abS", "abcd", + }; + const char *sorted_keys[ARRAY_SIZE(keys)]; + struct lpm_trie_bytes_key key, next_key; + unsigned int value, got, i, j, len; + struct lpm_trie_bytes_key *cur; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys)); + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + unsigned int flags; + + /* add i-th element */ + flags = i % 2 ? BPF_NOEXIST : 0; + len = strlen(keys[i]); + /* include the trailing '\0' */ + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[i], len); + value = i + 100; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "add elem", "#%u error %d\n", i, err); + + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u error %d\n", i, err); + CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got); + + /* re-add i-th element (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err); + + /* Overwrite i-th element */ + flags = i % 2 ? 0 : BPF_EXIST; + value = i; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "update elem", "error %d\n", err); + + /* Lookup #[0~i] elements */ + for (j = 0; j <= i; j++) { + len = strlen(keys[j]); + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[j], len); + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err); + CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n", + i, j, value, got); + } + } + + /* Add element to a full qp-trie (Error) */ + key.prefixlen = sizeof(key.data) * 8; + memset(key.data, 0, sizeof(key.data)); + value = 0; + err = bpf_map_update_elem(fd, &key, &value, 0); + CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err); + + /* Iterate sorted elements: no deletion */ + memcpy(sorted_keys, keys, sizeof(keys)); + qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str); + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + /* Iterate sorted elements: delete the found key after each iteration */ + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + + err = bpf_map_delete_elem(fd, cur); + CHECK(err, "delete", "#%u error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err); + + close(fd); +} + +/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of + * LPM trie will return these integers in big-endian order, therefore, convert + * these integers to big-endian before update. After each iteration, delete the + * found key (the smallest integer) and expect the next iteration will return + * the second smallest number. + */ +static void test_lpm_trie_iterate_ints(void) +{ + struct lpm_trie_int_key key, next_key; + unsigned int i, max_entries; + struct lpm_trie_int_key *cur; + unsigned int *data_set; + int fd, err; + bool value; + + max_entries = 4096; + data_set = calloc(max_entries, sizeof(*data_set)); + CHECK(!data_set, "malloc", "no mem\n"); + for (i = 0; i < max_entries; i++) + data_set[i] = i; + + fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries); + value = true; + for (i = 0; i < max_entries; i++) { + key.prefixlen = 32; + key.data = htobe32(data_set[i]); + + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add elem", "#%u error %d\n", i, err); + } + + cur = NULL; + for (i = 0; i < max_entries; i++) { + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n", + i, next_key.prefixlen); + CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n", + i, be32toh(next_key.data), data_set[i]); + cur = &next_key; + + /* + * Delete the minimal key, the next call of bpf_get_next_key() + * will return the second minimal key. + */ + err = bpf_map_delete_elem(fd, &next_key); + CHECK(err, "del elem", "#%u elem error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + err = bpf_map_get_next_key(fd, NULL, &next_key); + CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err); + + free(data_set); + + close(fd); +} + +void test_lpm_trie_map_basic_ops(void) { int i; /* we want predictable, pseudo random tests */ srand(0xf00ba1); - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - test_lpm_basic(); test_lpm_order(); @@ -792,6 +1179,10 @@ int main(void) test_lpm_get_next_key(); test_lpm_multi_thread(); - printf("test_lpm: OK\n"); - return 0; + test_lpm_trie_update_flags(); + test_lpm_trie_update_full_map(); + test_lpm_trie_iterate_strs(); + test_lpm_trie_iterate_ints(); + + printf("%s: PASS\n", __func__); } diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c new file mode 100644 index 000000000000..0ba015686492 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <linux/bpf.h> +#include <stdio.h> +#include <stdbool.h> +#include <unistd.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +struct test_lpm_key { + __u32 prefix; + __u32 data; +}; + +struct get_next_key_ctx { + struct test_lpm_key key; + bool start; + bool stop; + int map_fd; + int loop; +}; + +static void *get_next_key_fn(void *arg) +{ + struct get_next_key_ctx *ctx = arg; + struct test_lpm_key next_key; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key); + + return NULL; +} + +static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids, + unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +void test_lpm_trie_map_get_next_key(void) +{ +#define MAX_NR_THREADS 8 + LIBBPF_OPTS(bpf_map_create_opts, create_opts, + .map_flags = BPF_F_NO_PREALLOC); + struct test_lpm_key key = {}; + __u32 val = 0; + int map_fd; + const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix)); + const __u32 max_entries = max_prefixlen + 1; + unsigned int i, nr = MAX_NR_THREADS, loop = 65536; + pthread_t tids[MAX_NR_THREADS]; + struct get_next_key_ctx ctx; + int err; + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", + sizeof(struct test_lpm_key), sizeof(__u32), + max_entries, &create_opts); + CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + strerror(errno)); + + for (i = 0; i <= max_prefixlen; i++) { + key.prefix = i; + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + CHECK(err, "bpf_map_update_elem()", "error:%s\n", + strerror(errno)); + } + + ctx.start = false; + ctx.stop = false; + ctx.map_fd = map_fd; + ctx.loop = loop; + memcpy(&ctx.key, &key, sizeof(key)); + + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx); + if (err) { + abort_get_next_key(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + printf("%s:PASS\n", __func__); + + close(map_fd); +} diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 7d050364efca..a4121d2248ac 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -12,6 +12,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_util.h" #include "test_maps.h" #include "task_local_storage_helpers.h" #include "read_bpf_task_storage_busy.skel.h" @@ -77,8 +78,8 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "open_and_load", "error %d\n", err); /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPT) { - printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__); + if (!skel->kconfig->CONFIG_PREEMPTION) { + printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); read_bpf_task_storage_busy__destroy(skel); skips++; return; @@ -115,7 +116,7 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "attach", "error %d\n", err); /* Trigger program */ - syscall(SYS_gettid); + sys_gettid(); skel->bss->pid = 0; CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c72c16e1aff8..5764155b6d25 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETWORK_HELPERS_H #define __NETWORK_HELPERS_H +#include <arpa/inet.h> #include <sys/socket.h> #include <sys/types.h> #include <linux/types.h> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 070c52c312e5..6befa870434b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -690,7 +690,7 @@ void test_bpf_cookie(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->my_tid = syscall(SYS_gettid); + skel->bss->my_tid = sys_gettid(); if (test__start_subtest("kprobe")) kprobe_subtest(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 52e6f7570475..6f1bfacd7375 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -226,7 +226,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), "pthread_create"); - skel->bss->tid = getpid(); + skel->bss->tid = sys_gettid(); do_dummy_read_opts(skel->progs.dump_task, opts); @@ -249,25 +249,42 @@ static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid"); } -static void test_task_tid(void) +static void *run_test_task_tid(void *arg) { LIBBPF_OPTS(bpf_iter_attach_opts, opts); union bpf_iter_link_info linfo; int num_unknown_tid, num_known_tid; + ASSERT_NEQ(getpid(), sys_gettid(), "check_new_thread_id"); + memset(&linfo, 0, sizeof(linfo)); - linfo.task.tid = getpid(); + linfo.task.tid = sys_gettid(); opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); test_task_common(&opts, 0, 1); linfo.task.tid = 0; linfo.task.pid = getpid(); - test_task_common(&opts, 1, 1); + /* This includes the parent thread, this thread, watchdog timer thread + * and the do_nothing_wait thread + */ + test_task_common(&opts, 3, 1); test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid); - ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid"); + ASSERT_GT(num_unknown_tid, 2, "check_num_unknown_tid"); ASSERT_EQ(num_known_tid, 1, "check_num_known_tid"); + + return NULL; +} + +static void test_task_tid(void) +{ + pthread_t thread_id; + + /* Create a new thread so pid and tid aren't the same */ + ASSERT_OK(pthread_create(&thread_id, NULL, &run_test_task_tid, NULL), + "pthread_create"); + ASSERT_FALSE(pthread_join(thread_id, NULL), "pthread_join"); } static void test_task_pid(void) @@ -280,7 +297,7 @@ static void test_task_pid(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); } static void test_task_pidfd(void) @@ -298,7 +315,7 @@ static void test_task_pidfd(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); close(pidfd); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 409a06975823..b7d1b52309d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -16,10 +16,6 @@ #include "tcp_ca_kfunc.skel.h" #include "bpf_cc_cubic.skel.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index ef4d6a3ae423..cf15cc3be491 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -17,32 +17,37 @@ #include "test_progs.h" #include "test_btf_skc_cls_ingress.skel.h" -static struct test_btf_skc_cls_ingress *skel; -static struct sockaddr_in6 srv_sa6; -static __u32 duration; +#define TEST_NS "skc_cls_ingress" -static int prepare_netns(void) +#define BIT(n) (1 << (n)) +#define TEST_MODE_IPV4 BIT(0) +#define TEST_MODE_IPV6 BIT(1) +#define TEST_MODE_DUAL (TEST_MODE_IPV4 | TEST_MODE_IPV6) + +#define SERVER_ADDR_IPV4 "127.0.0.1" +#define SERVER_ADDR_IPV6 "::1" +#define SERVER_ADDR_DUAL "::0" +/* RFC791, 576 for minimal IPv4 datagram, minus 40 bytes of TCP header */ +#define MIN_IPV4_MSS 536 + +static struct netns_obj *prepare_netns(struct test_btf_skc_cls_ingress *skel) { LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_attach, .prog_fd = bpf_program__fd(skel->progs.cls_ingress)); + struct netns_obj *ns = NULL; - if (CHECK(unshare(CLONE_NEWNET), "create netns", - "unshare(CLONE_NEWNET): %s (%d)", - strerror(errno), errno)) - return -1; - - if (CHECK(system("ip link set dev lo up"), - "ip link set dev lo up", "failed\n")) - return -1; + ns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(ns, "create and join netns")) + return ns; qdisc_lo.ifindex = if_nametoindex("lo"); if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) - return -1; + goto free_ns; if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), "filter add dev lo ingress")) - return -1; + goto free_ns; /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the * bpf_tcp_gen_syncookie() helper. @@ -50,71 +55,142 @@ static int prepare_netns(void) if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") || write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") || write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1")) - return -1; + goto free_ns; + + return ns; - return 0; +free_ns: + netns_free(ns); + return NULL; } -static void reset_test(void) +static void reset_test(struct test_btf_skc_cls_ingress *skel) { + memset(&skel->bss->srv_sa4, 0, sizeof(skel->bss->srv_sa4)); memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6)); skel->bss->listen_tp_sport = 0; skel->bss->req_sk_sport = 0; skel->bss->recv_cookie = 0; skel->bss->gen_cookie = 0; skel->bss->linum = 0; + skel->bss->mss = 0; } -static void print_err_line(void) +static void print_err_line(struct test_btf_skc_cls_ingress *skel) { if (skel->bss->linum) printf("bpf prog error at line %u\n", skel->bss->linum); } -static void test_conn(void) +static int v6only_true(int fd, void *opts) +{ + int mode = true; + + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); +} + +static int v6only_false(int fd, void *opts) { + int mode = false; + + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); +} + +static void run_test(struct test_btf_skc_cls_ingress *skel, bool gen_cookies, + int ip_mode) +{ + const char *tcp_syncookies = gen_cookies ? "2" : "1"; int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; - socklen_t addrlen = sizeof(srv_sa6); + struct network_helper_opts opts = { 0 }; + struct sockaddr_storage *addr; + struct sockaddr_in6 srv_sa6; + struct sockaddr_in srv_sa4; + socklen_t addr_len; + int sock_family; + char *srv_addr; int srv_port; - if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + switch (ip_mode) { + case TEST_MODE_IPV4: + sock_family = AF_INET; + srv_addr = SERVER_ADDR_IPV4; + addr = (struct sockaddr_storage *)&srv_sa4; + addr_len = sizeof(srv_sa4); + break; + case TEST_MODE_IPV6: + opts.post_socket_cb = v6only_true; + sock_family = AF_INET6; + srv_addr = SERVER_ADDR_IPV6; + addr = (struct sockaddr_storage *)&srv_sa6; + addr_len = sizeof(srv_sa6); + break; + case TEST_MODE_DUAL: + opts.post_socket_cb = v6only_false; + sock_family = AF_INET6; + srv_addr = SERVER_ADDR_DUAL; + addr = (struct sockaddr_storage *)&srv_sa6; + addr_len = sizeof(srv_sa6); + break; + default: + PRINT_FAIL("Unknown IP mode %d", ip_mode); return; + } - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (CHECK_FAIL(listen_fd == -1)) + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", tcp_syncookies)) return; - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, - errno)) - goto done; - memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); - srv_port = ntohs(srv_sa6.sin6_port); + listen_fd = start_server_str(sock_family, SOCK_STREAM, srv_addr, 0, + &opts); + if (!ASSERT_OK_FD(listen_fd, "start server")) + return; - cli_fd = connect_to_fd(listen_fd, 0); - if (CHECK_FAIL(cli_fd == -1)) + err = getsockname(listen_fd, (struct sockaddr *)addr, &addr_len); + if (!ASSERT_OK(err, "getsockname(listen_fd)")) goto done; - srv_fd = accept(listen_fd, NULL, NULL); - if (CHECK_FAIL(srv_fd == -1)) + switch (ip_mode) { + case TEST_MODE_IPV4: + memcpy(&skel->bss->srv_sa4, &srv_sa4, sizeof(srv_sa4)); + srv_port = ntohs(srv_sa4.sin_port); + break; + case TEST_MODE_IPV6: + case TEST_MODE_DUAL: + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + break; + default: goto done; + } - if (CHECK(skel->bss->listen_tp_sport != srv_port || - skel->bss->req_sk_sport != srv_port, - "Unexpected sk src port", - "listen_tp_sport:%u req_sk_sport:%u expected:%u\n", - skel->bss->listen_tp_sport, skel->bss->req_sk_sport, - srv_port)) + cli_fd = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect client")) goto done; - if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie, - "Unexpected syncookie states", - "gen_cookie:%u recv_cookie:%u\n", - skel->bss->gen_cookie, skel->bss->recv_cookie)) + srv_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_OK_FD(srv_fd, "accept connection")) goto done; - CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", - skel->bss->linum); + ASSERT_EQ(skel->bss->listen_tp_sport, srv_port, "listen tp src port"); + + if (!gen_cookies) { + ASSERT_EQ(skel->bss->req_sk_sport, srv_port, + "request socket source port with syncookies disabled"); + ASSERT_EQ(skel->bss->gen_cookie, 0, + "generated syncookie with syncookies disabled"); + ASSERT_EQ(skel->bss->recv_cookie, 0, + "received syncookie with syncookies disabled"); + } else { + ASSERT_EQ(skel->bss->req_sk_sport, 0, + "request socket source port with syncookies enabled"); + ASSERT_NEQ(skel->bss->gen_cookie, 0, + "syncookie properly generated"); + ASSERT_EQ(skel->bss->gen_cookie, skel->bss->recv_cookie, + "matching syncookies on client and server"); + ASSERT_GT(skel->bss->mss, MIN_IPV4_MSS, + "MSS in cookie min value"); + ASSERT_LT(skel->bss->mss, USHRT_MAX, + "MSS in cookie max value"); + } done: if (listen_fd != -1) @@ -125,96 +201,74 @@ done: close(srv_fd); } -static void test_syncookie(void) +static void test_conn_ipv4(struct test_btf_skc_cls_ingress *skel) { - int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; - socklen_t addrlen = sizeof(srv_sa6); - int srv_port; - - /* Enforce syncookie mode */ - if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) - return; - - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (CHECK_FAIL(listen_fd == -1)) - return; - - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, - errno)) - goto done; - memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); - srv_port = ntohs(srv_sa6.sin6_port); - - cli_fd = connect_to_fd(listen_fd, 0); - if (CHECK_FAIL(cli_fd == -1)) - goto done; - - srv_fd = accept(listen_fd, NULL, NULL); - if (CHECK_FAIL(srv_fd == -1)) - goto done; + run_test(skel, false, TEST_MODE_IPV4); +} - if (CHECK(skel->bss->listen_tp_sport != srv_port, - "Unexpected tp src port", - "listen_tp_sport:%u expected:%u\n", - skel->bss->listen_tp_sport, srv_port)) - goto done; +static void test_conn_ipv6(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, false, TEST_MODE_IPV6); +} - if (CHECK(skel->bss->req_sk_sport, - "Unexpected req_sk src port", - "req_sk_sport:%u expected:0\n", - skel->bss->req_sk_sport)) - goto done; +static void test_conn_dual(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, false, TEST_MODE_DUAL); +} - if (CHECK(!skel->bss->gen_cookie || - skel->bss->gen_cookie != skel->bss->recv_cookie, - "Unexpected syncookie states", - "gen_cookie:%u recv_cookie:%u\n", - skel->bss->gen_cookie, skel->bss->recv_cookie)) - goto done; +static void test_syncookie_ipv4(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_IPV4); +} - CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", - skel->bss->linum); +static void test_syncookie_ipv6(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_IPV6); +} -done: - if (listen_fd != -1) - close(listen_fd); - if (cli_fd != -1) - close(cli_fd); - if (srv_fd != -1) - close(srv_fd); +static void test_syncookie_dual(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_DUAL); } struct test { const char *desc; - void (*run)(void); + void (*run)(struct test_btf_skc_cls_ingress *skel); }; #define DEF_TEST(name) { #name, test_##name } static struct test tests[] = { - DEF_TEST(conn), - DEF_TEST(syncookie), + DEF_TEST(conn_ipv4), + DEF_TEST(conn_ipv6), + DEF_TEST(conn_dual), + DEF_TEST(syncookie_ipv4), + DEF_TEST(syncookie_ipv6), + DEF_TEST(syncookie_dual), }; void test_btf_skc_cls_ingress(void) { + struct test_btf_skc_cls_ingress *skel; + struct netns_obj *ns; int i; skel = test_btf_skc_cls_ingress__open_and_load(); - if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "test_btf_skc_cls_ingress__open_and_load")) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { if (!test__start_subtest(tests[i].desc)) continue; - if (prepare_netns()) + ns = prepare_netns(skel); + if (!ns) break; - tests[i].run(); + tests[i].run(skel); - print_err_line(); - reset_test(); + print_err_line(skel); + reset_test(skel); + netns_free(ns); } test_btf_skc_cls_ingress__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c index 3bff680de16c..c40df623a8f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cb_refs.c +++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c @@ -11,8 +11,8 @@ struct { const char *prog_name; const char *err_msg; } cb_refs_tests[] = { - { "underflow_prog", "reference has not been acquired before" }, - { "leak_prog", "Unreleased reference" }, + { "underflow_prog", "must point to scalar, or struct with scalar" }, + { "leak_prog", "Possibly NULL pointer passed to helper arg2" }, { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */ }; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c index 9250a1e9f9af..3f9ffdf71343 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c @@ -35,7 +35,7 @@ static int send_datagram(void) if (!ASSERT_OK_FD(sock, "create socket")) return sock; - if (!ASSERT_OK(connect(sock, &addr, sizeof(addr)), "connect")) { + if (!ASSERT_OK(connect(sock, (struct sockaddr *)&addr, sizeof(addr)), "connect")) { close(sock); return -1; } diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 747761572098..9015e2c2ab12 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -63,14 +63,14 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK(err, "map_delete_elem")) goto out; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = cgrp_ls_tp_btf__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); skel->bss->target_pid = 0; @@ -154,7 +154,7 @@ static void test_recursion(int cgroup_fd) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ - syscall(SYS_gettid); + sys_gettid(); out: cgrp_ls_recursion__destroy(skel); @@ -224,7 +224,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) return; CGROUP_MODE_SET(skel); - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..7526de379081 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + */ +void test_changes_pkt_data_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 26019313e1fc..1c682550e0e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1010,7 +1010,7 @@ static void run_core_reloc_tests(bool use_btfgen) struct data *data; void *mmap_data = NULL; - my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); + my_pid_tgid = getpid() | ((uint64_t)sys_gettid() << 32); for (i = 0; i < ARRAY_SIZE(test_cases); i++) { char btf_file[] = "/tmp/core_reloc.btf.XXXXXX"; diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 2570bd4b0cb2..e58a04654238 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -23,6 +23,7 @@ static const char * const cpumask_success_testcases[] = { "test_global_mask_array_l2_rcu", "test_global_mask_nested_rcu", "test_global_mask_nested_deep_rcu", + "test_global_mask_nested_deep_array_rcu", "test_cpumask_weight", }; diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index f3932941bbaa..d50cbd8040d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -67,8 +67,9 @@ again: ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, + "name_len"); if (!info.perf_event.kprobe.func_name) { - ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -79,8 +80,9 @@ again: ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: + ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, + "name_len"); if (!info.perf_event.tracepoint.tp_name) { - ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -96,8 +98,9 @@ again: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, + "name_len"); if (!info.perf_event.uprobe.file_name) { - ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; @@ -417,6 +420,15 @@ verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets, if (!ASSERT_NEQ(err, -1, "readlink")) return -1; + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + return -1; + + ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count"); + ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, + "info.uprobe_multi.path_size"); + for (bit = 0; bit < 8; bit++) { memset(&info, 0, sizeof(info)); info.uprobe_multi.path = ptr_to_u64(path_buf); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 89ff23c4a8bc..3cea71f9c500 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -192,8 +192,8 @@ static void subtest_task_iters(void) syscall(SYS_getpgid); iters_task__detach(skel); ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); - ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); - ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->threads_cnt, thread_num + 2, "threads_cnt"); + ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 2, "proc_threads_cnt"); ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c new file mode 100644 index 000000000000..48c0560d398e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <testing_helpers.h> + +#include "kfunc_module_order.skel.h" + +static int test_run_prog(const struct bpf_program *prog, + struct bpf_test_run_opts *opts) +{ + int err; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + return err; + + if (!ASSERT_EQ((int)opts->retval, 0, bpf_program__name(prog))) + return -EINVAL; + + return 0; +} + +void test_kfunc_module_order(void) +{ + struct kfunc_module_order *skel; + char pkt_data[64] = {}; + int err = 0; + + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, test_opts, .data_in = pkt_data, + .data_size_in = sizeof(pkt_data)); + + err = load_module("bpf_test_modorder_x.ko", + env_verbosity > VERBOSE_NONE); + if (!ASSERT_OK(err, "load bpf_test_modorder_x.ko")) + return; + + err = load_module("bpf_test_modorder_y.ko", + env_verbosity > VERBOSE_NONE); + if (!ASSERT_OK(err, "load bpf_test_modorder_y.ko")) + goto exit_modx; + + skel = kfunc_module_order__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kfunc_module_order__open_and_load()")) { + err = -EINVAL; + goto exit_mods; + } + + test_run_prog(skel->progs.call_kfunc_xy, &test_opts); + test_run_prog(skel->progs.call_kfunc_yx, &test_opts); + + kfunc_module_order__destroy(skel); +exit_mods: + unload_module("bpf_test_modorder_y", env_verbosity > VERBOSE_NONE); +exit_modx: + unload_module("bpf_test_modorder_x", env_verbosity > VERBOSE_NONE); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c new file mode 100644 index 000000000000..8e13a3416a21 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "kmem_cache_iter.skel.h" + +#define SLAB_NAME_MAX 32 + +struct kmem_cache_result { + char name[SLAB_NAME_MAX]; + long obj_size; +}; + +static void subtest_kmem_cache_iter_check_task_struct(struct kmem_cache_iter *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .flags = 0, /* Run it with the current task */ + ); + int prog_fd = bpf_program__fd(skel->progs.check_task_struct); + + /* Get task_struct and check it if's from a slab cache */ + ASSERT_OK(bpf_prog_test_run_opts(prog_fd, &opts), "prog_test_run"); + + /* The BPF program should set 'found' variable */ + ASSERT_EQ(skel->bss->task_struct_found, 1, "task_struct_found"); +} + +static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel) +{ + FILE *fp; + int map_fd; + char name[SLAB_NAME_MAX]; + unsigned long objsize; + char rest_of_line[1000]; + struct kmem_cache_result r; + int seen = 0; + + fp = fopen("/proc/slabinfo", "r"); + if (fp == NULL) { + /* CONFIG_SLUB_DEBUG is not enabled */ + return; + } + + map_fd = bpf_map__fd(skel->maps.slab_result); + + /* Ignore first two lines for header */ + fscanf(fp, "slabinfo - version: %*d.%*d\n"); + fscanf(fp, "# %*s %*s %*s %*s %*s %*s : %[^\n]\n", rest_of_line); + + /* Compare name and objsize only - others can be changes frequently */ + while (fscanf(fp, "%s %*u %*u %lu %*u %*u : %[^\n]\n", + name, &objsize, rest_of_line) == 3) { + int ret = bpf_map_lookup_elem(map_fd, &seen, &r); + + if (!ASSERT_OK(ret, "kmem_cache_lookup")) + break; + + ASSERT_STREQ(r.name, name, "kmem_cache_name"); + ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize"); + + seen++; + } + + ASSERT_EQ(skel->bss->kmem_cache_seen, seen, "kmem_cache_seen_eq"); + + fclose(fp); +} + +static void subtest_kmem_cache_iter_open_coded(struct kmem_cache_iter *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, fd; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.open_coded_iter); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + /* It should be same as we've seen from the explicit iterator */ + ASSERT_EQ(skel->bss->open_coded_seen, skel->bss->kmem_cache_seen, "open_code_seen_eq"); +} + +void test_kmem_cache_iter(void) +{ + struct kmem_cache_iter *skel = NULL; + char buf[256]; + int iter_fd; + + skel = kmem_cache_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kmem_cache_iter__open_and_load")) + return; + + if (!ASSERT_OK(kmem_cache_iter__attach(skel), "skel_attach")) + goto destroy; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.slab_info_collector)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto destroy; + + memset(buf, 0, sizeof(buf)); + while (read(iter_fd, buf, sizeof(buf) > 0)) { + /* Read out all contents */ + printf("%s", buf); + } + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + if (test__start_subtest("check_task_struct")) + subtest_kmem_cache_iter_check_task_struct(skel); + if (test__start_subtest("check_slabinfo")) + subtest_kmem_cache_iter_check_slabinfo(skel); + if (test__start_subtest("open_coded_iter")) + subtest_kmem_cache_iter_open_coded(skel); + + close(iter_fd); + +destroy: + kmem_cache_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 960c9323d1e0..66ab1cae923e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -6,6 +6,7 @@ #include "kprobe_multi_override.skel.h" #include "kprobe_multi_session.skel.h" #include "kprobe_multi_session_cookie.skel.h" +#include "kprobe_multi_verifier.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -764,4 +765,5 @@ void test_kprobe_multi_test(void) test_session_skel_api(); if (test__start_subtest("session_cookie")) test_session_cookie_skel_api(); + RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index cad664546912..fa639b021f7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -20,7 +20,7 @@ void test_linked_funcs(void) bpf_program__set_autoload(skel->progs.handler1, true); bpf_program__set_autoload(skel->progs.handler2, true); - skel->rodata->my_tid = syscall(SYS_gettid); + skel->rodata->my_tid = sys_gettid(); skel->bss->syscall_id = SYS_getpgid; err = linked_funcs__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 27676a04d0b6..169ce689b97c 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -169,7 +169,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_GE(fd, 0, "good_fd1"); if (fd >= 0) close(fd); - fd = -1; /* log_level == 2 should always fill log_buf, even for good prog */ log_buf[0] = '\0'; @@ -180,7 +179,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); - fd = -1; /* log_level == 0 should fill log_buf for bad prog */ log_buf[0] = '\0'; @@ -191,7 +189,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_LT(fd, 0, "bad_fd"); if (fd >= 0) close(fd); - fd = -1; free(log_buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 130a3b21e467..6df25de8f080 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -10,10 +10,6 @@ #include "cgroup_helpers.h" #include "network_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - static struct btf *btf; static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c index d2a10eb4e5b5..286a9fb469e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c @@ -5,7 +5,9 @@ #include <sys/syscall.h> #include <test_progs.h> #include <bpf/btf.h> + #include "access_map_in_map.skel.h" +#include "update_map_in_htab.skel.h" struct thread_ctx { pthread_barrier_t barrier; @@ -127,6 +129,131 @@ out: access_map_in_map__destroy(skel); } +static void add_del_fd_htab(int outer_fd) +{ + int inner_fd, err; + int key = 1; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* Delete */ + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); +} + +static void overwrite_fd_htab(int outer_fd) +{ + int inner_fd, err; + int key = 1; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* Overwrite */ + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner2")) + goto out; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_EXIST); + close(inner_fd); + if (!ASSERT_OK(err, "overwrite")) + goto out; + + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); + return; +out: + bpf_map_delete_elem(outer_fd, &key); +} + +static void lookup_delete_fd_htab(int outer_fd) +{ + int key = 1, value; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* lookup_and_delete is not supported for htab of maps */ + err = bpf_map_lookup_and_delete_elem(outer_fd, &key, &value); + ASSERT_EQ(err, -ENOTSUPP, "lookup_del"); + + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); +} + +static void batched_lookup_delete_fd_htab(int outer_fd) +{ + int keys[2] = {1, 2}, values[2]; + unsigned int cnt, batch; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + + err = bpf_map_update_elem(outer_fd, &keys[0], &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add1")) + return; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner2")) + goto out; + err = bpf_map_update_elem(outer_fd, &keys[1], &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add2")) + goto out; + + /* batched lookup_and_delete */ + cnt = ARRAY_SIZE(keys); + err = bpf_map_lookup_and_delete_batch(outer_fd, NULL, &batch, keys, values, &cnt, NULL); + ASSERT_TRUE((!err || err == -ENOENT), "delete_batch ret"); + ASSERT_EQ(cnt, ARRAY_SIZE(keys), "delete_batch cnt"); + +out: + bpf_map_delete_elem(outer_fd, &keys[0]); +} + +static void test_update_map_in_htab(bool preallocate) +{ + struct update_map_in_htab *skel; + int err, fd; + + skel = update_map_in_htab__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + err = update_map_in_htab__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + fd = preallocate ? bpf_map__fd(skel->maps.outer_htab_map) : + bpf_map__fd(skel->maps.outer_alloc_htab_map); + + add_del_fd_htab(fd); + overwrite_fd_htab(fd); + lookup_delete_fd_htab(fd); + batched_lookup_delete_fd_htab(fd); +out: + update_map_in_htab__destroy(skel); +} + void test_map_in_map(void) { if (test__start_subtest("acc_map_in_array")) @@ -137,5 +264,8 @@ void test_map_in_map(void) test_map_in_map_access("access_map_in_htab", "outer_htab_map"); if (test__start_subtest("sleepable_acc_map_in_htab")) test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); + if (test__start_subtest("update_map_in_htab")) + test_update_map_in_htab(true); + if (test__start_subtest("update_map_in_alloc_htab")) + test_update_map_in_htab(false); } - diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index d2ca32fa3b21..f8eb7f9d4fd2 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -5,12 +5,17 @@ #include <linux/const.h> #include <netinet/in.h> #include <test_progs.h> +#include <unistd.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" +#include "mptcp_subflow.skel.h" #define NS_TEST "mptcp_ns" +#define ADDR_1 "10.0.1.1" +#define ADDR_2 "10.0.1.2" +#define PORT_1 10001 #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 @@ -64,24 +69,6 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; }; -static struct nstoken *create_netns(void) -{ - SYS(fail, "ip netns add %s", NS_TEST); - SYS(fail, "ip -net %s link set dev lo up", NS_TEST); - - return open_netns(NS_TEST); -fail: - return NULL; -} - -static void cleanup_netns(struct nstoken *nstoken) -{ - if (nstoken) - close_netns(nstoken); - - SYS_NOFAIL("ip netns del %s", NS_TEST); -} - static int start_mptcp_server(int family, const char *addr_str, __u16 port, int timeout_ms) { @@ -201,15 +188,15 @@ out: static void test_base(void) { - struct nstoken *nstoken = NULL; + struct netns_obj *netns = NULL; int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/mptcp"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - nstoken = create_netns(); - if (!ASSERT_OK_PTR(nstoken, "create_netns")) + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) goto fail; /* without MPTCP */ @@ -232,7 +219,7 @@ with_mptcp: close(server_fd); fail: - cleanup_netns(nstoken); + netns_free(netns); close(cgroup_fd); } @@ -317,21 +304,135 @@ out: static void test_mptcpify(void) { - struct nstoken *nstoken = NULL; + struct netns_obj *netns = NULL; int cgroup_fd; cgroup_fd = test__join_cgroup("/mptcpify"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - nstoken = create_netns(); - if (!ASSERT_OK_PTR(nstoken, "create_netns")) + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) goto fail; ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify"); fail: - cleanup_netns(nstoken); + netns_free(netns); + close(cgroup_fd); +} + +static int endpoint_init(char *flags) +{ + SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1); + SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2); + SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST); + if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) { + printf("'ip mptcp' not supported, skip this test.\n"); + test__skip(); + goto fail; + } + + return 0; +fail: + return -1; +} + +static void wait_for_new_subflows(int fd) +{ + socklen_t len; + u8 subflows; + int err, i; + + len = sizeof(subflows); + /* Wait max 5 sec for new subflows to be created */ + for (i = 0; i < 50; i++) { + err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len); + if (!err && subflows > 0) + break; + + usleep(100000); /* 0.1s */ + } +} + +static void run_subflow(void) +{ + int server_fd, client_fd, err; + char new[TCP_CA_NAME_MAX]; + char cc[TCP_CA_NAME_MAX]; + unsigned int mark; + socklen_t len; + + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); + if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) + return; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + send_byte(client_fd); + wait_for_new_subflows(client_fd); + + len = sizeof(mark); + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len); + if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)")) + ASSERT_EQ(mark, 0, "mark"); + + len = sizeof(new); + err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len); + if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) { + get_msk_ca_name(cc); + ASSERT_STREQ(new, cc, "cc"); + } + + close(client_fd); +close_server: + close(server_fd); +} + +static void test_subflow(void) +{ + struct mptcp_subflow *skel; + struct netns_obj *netns; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcp_subflow"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow")) + return; + + skel = mptcp_subflow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow")) + goto close_cgroup; + + skel->bss->pid = getpid(); + + skel->links.mptcp_subflow = + bpf_program__attach_cgroup(skel->progs.mptcp_subflow, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_subflow, "attach mptcp_subflow")) + goto skel_destroy; + + skel->links._getsockopt_subflow = + bpf_program__attach_cgroup(skel->progs._getsockopt_subflow, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + run_subflow(); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_subflow__destroy(skel); +close_cgroup: close(cgroup_fd); } @@ -341,4 +442,6 @@ void test_mptcp(void) test_base(); if (test__start_subtest("mptcpify")) test_mptcpify(); + if (test__start_subtest("subflow")) + test_subflow(); } diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c index 4297a2a4cb11..2f52fa2641ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c @@ -26,10 +26,43 @@ static const struct nf_link_test nf_hook_link_tests[] = { { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", }, - { .pf = NFPROTO_IPV4, .priority = -10000, .expect_success = true, .name = "attach ipv4", }, - { .pf = NFPROTO_IPV6, .priority = 10001, .expect_success = true, .name = "attach ipv6", }, + { + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = -10000, + .flags = 0, + .expect_success = true, + .name = "attach ipv4", + }, + { + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_FORWARD, + .priority = 10001, + .flags = BPF_F_NETFILTER_IP_DEFRAG, + .expect_success = true, + .name = "attach ipv6", + }, }; +static void verify_netfilter_link_info(struct bpf_link *link, const struct nf_link_test nf_expected) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + int err, fd; + + memset(&info, 0, len); + + fd = bpf_link__fd(link); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_OK(err, "get_link_info"); + + ASSERT_EQ(info.type, BPF_LINK_TYPE_NETFILTER, "info link type"); + ASSERT_EQ(info.netfilter.pf, nf_expected.pf, "info nf protocol family"); + ASSERT_EQ(info.netfilter.hooknum, nf_expected.hooknum, "info nf hooknum"); + ASSERT_EQ(info.netfilter.priority, nf_expected.priority, "info nf priority"); + ASSERT_EQ(info.netfilter.flags, nf_expected.flags, "info nf flags"); +} + void test_netfilter_link_attach(void) { struct test_netfilter_link_attach *skel; @@ -64,6 +97,8 @@ void test_netfilter_link_attach(void) if (!ASSERT_OK_PTR(link, "program attach successful")) continue; + verify_netfilter_link_info(link, nf_hook_link_tests[i]); + link2 = bpf_program__attach_netfilter(prog, &opts); ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority"); @@ -73,6 +108,9 @@ void test_netfilter_link_attach(void) link2 = bpf_program__attach_netfilter(prog, &opts); if (!ASSERT_OK_PTR(link2, "program reattach successful")) continue; + + verify_netfilter_link_info(link2, nf_hook_link_tests[i]); + if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy")) break; } else { diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c index 71d8f3ba7d6b..ac3c3c097c0e 100644 --- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c @@ -8,12 +8,16 @@ #define SO_NETNS_COOKIE 71 #endif +#define loopback 1 + static int duration; void test_netns_cookie(void) { + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); int server_fd = -1, client_fd = -1, cgroup_fd = -1; - int err, val, ret, map, verdict; + int err, val, ret, map, verdict, tc_fd; struct netns_cookie_prog *skel; uint64_t cookie_expected_value; socklen_t vallen = sizeof(cookie_expected_value); @@ -38,36 +42,47 @@ void test_netns_cookie(void) if (!ASSERT_OK(err, "prog_attach")) goto done; + tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); + err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); + if (!ASSERT_OK(err, "prog_attach")) + goto done; + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) - goto done; + goto cleanup_tc; client_fd = connect_to_fd(server_fd, 0); if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno)) - goto done; + goto cleanup_tc; ret = send(client_fd, send_msg, sizeof(send_msg), 0); if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret)) - goto done; + goto cleanup_tc; err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)")) - goto done; + goto cleanup_tc; err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE, &cookie_expected_value, &vallen); if (!ASSERT_OK(err, "getsockopt")) - goto done; + goto cleanup_tc; ASSERT_EQ(val, cookie_expected_value, "cookie_value"); err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) - goto done; + goto cleanup_tc; ASSERT_EQ(val, cookie_expected_value, "cookie_value"); + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); + +cleanup_tc: + err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd); + ASSERT_OK(err, "prog_detach"); done: if (server_fd != -1) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index c29787e092d6..761ce24bce38 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -23,7 +23,7 @@ static int get_pid_tgid(pid_t *pid, pid_t *tgid, struct stat st; int err; - *pid = syscall(SYS_gettid); + *pid = sys_gettid(); *tgid = getpid(); err = stat("/proc/self/ns/pid", &st); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index 3a25f1c743a1..d940ff87fa08 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -4,8 +4,12 @@ #include <pthread.h> #include <sched.h> #include <test_progs.h> +#include "testing_helpers.h" #include "test_perf_link.skel.h" +#define BURN_TIMEOUT_MS 100 +#define BURN_TIMEOUT_NS BURN_TIMEOUT_MS * 1000000 + static void burn_cpu(void) { volatile int j = 0; @@ -32,6 +36,7 @@ void serial_test_perf_link(void) int run_cnt_before, run_cnt_after; struct bpf_link_info info; __u32 info_len = sizeof(info); + __u64 timeout_time_ns; /* create perf event */ memset(&attr, 0, sizeof(attr)); @@ -63,8 +68,14 @@ void serial_test_perf_link(void) ASSERT_GT(info.prog_id, 0, "link_prog_id"); /* ensure we get at least one perf_event prog execution */ - burn_cpu(); - ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt"); + timeout_time_ns = get_time_ns() + BURN_TIMEOUT_NS; + while (true) { + burn_cpu(); + if (skel->bss->run_cnt > 0) + break; + if (!ASSERT_LT(get_time_ns(), timeout_time_ns, "run_cnt_timeout")) + break; + } /* perf_event is still active, but we close link and BPF program * shouldn't be executed anymore diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c new file mode 100644 index 000000000000..43676a9922dc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "raw_tp_null.skel.h" +#include "raw_tp_null_fail.skel.h" + +void test_raw_tp_null(void) +{ + struct raw_tp_null *skel; + + RUN_TESTS(raw_tp_null_fail); + + skel = raw_tp_null__open_and_load(); + if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) + return; + + skel->bss->tid = sys_gettid(); + + if (!ASSERT_OK(raw_tp_null__attach(skel), "raw_tp_null__attach")) + goto end; + + ASSERT_OK(trigger_module_test_read(2), "trigger testmod read"); + ASSERT_EQ(skel->bss->i, 3, "invocations"); + +end: + raw_tp_null__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index a1f7e7378a64..ebe0c12b5536 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -21,7 +21,7 @@ static void test_success(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); @@ -58,7 +58,7 @@ static void test_rcuptr_acquire(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.task_acquire, true); err = rcu_read_lock__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 6cc69900b310..1702aa592c2c 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -3,22 +3,32 @@ #include <sys/time.h> #include <sys/resource.h> #include "test_send_signal_kern.skel.h" +#include "io_helpers.h" static int sigusr1_received; static void sigusr1_handler(int signum) { - sigusr1_received = 1; + sigusr1_received = 8; +} + +static void sigusr1_siginfo_handler(int s, siginfo_t *i, void *v) +{ + sigusr1_received = (int)(long long)i->si_value.sival_ptr; } static void test_send_signal_common(struct perf_event_attr *attr, - bool signal_thread) + bool signal_thread, bool remote) { struct test_send_signal_kern *skel; + struct sigaction sa; int pipe_c2p[2], pipe_p2c[2]; int err = -1, pmu_fd = -1; + volatile int j = 0; + int retry_count; char buf[256]; pid_t pid; + int old_prio; if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p")) return; @@ -39,11 +49,14 @@ static void test_send_signal_common(struct perf_event_attr *attr, } if (pid == 0) { - int old_prio; - volatile int j = 0; - /* install signal handler and notify parent */ - ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal"); + if (remote) { + sa.sa_sigaction = sigusr1_siginfo_handler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + ASSERT_NEQ(sigaction(SIGUSR1, &sa, NULL), -1, "sigaction"); + } else { + ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal"); + } close(pipe_c2p[0]); /* close read */ close(pipe_p2c[1]); /* close write */ @@ -52,10 +65,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, * that if an interrupt happens, the underlying task * is this process. */ - errno = 0; - old_prio = getpriority(PRIO_PROCESS, 0); - ASSERT_OK(errno, "getpriority"); - ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + if (!remote) { + errno = 0; + old_prio = getpriority(PRIO_PROCESS, 0); + ASSERT_OK(errno, "getpriority"); + ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + } /* notify parent signal handler is installed */ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); @@ -66,20 +81,25 @@ static void test_send_signal_common(struct perf_event_attr *attr, /* wait a little for signal handler */ for (int i = 0; i < 1000000000 && !sigusr1_received; i++) { j /= i + j + 1; - if (!attr) - /* trigger the nanosleep tracepoint program. */ - usleep(1); + if (remote) + sleep(1); + else + if (!attr) + /* trigger the nanosleep tracepoint program. */ + usleep(1); } - buf[0] = sigusr1_received ? '2' : '0'; - ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); + buf[0] = sigusr1_received; + + ASSERT_EQ(sigusr1_received, 8, "sigusr1_received"); ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); /* wait for parent notification and exit */ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* restore the old priority */ - ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); + if (!remote) + ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -93,6 +113,17 @@ static void test_send_signal_common(struct perf_event_attr *attr, if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) goto skel_open_load_failure; + /* boost with a high priority so we got a higher chance + * that if an interrupt happens, the underlying task + * is this process. + */ + if (remote) { + errno = 0; + old_prio = getpriority(PRIO_PROCESS, 0); + ASSERT_OK(errno, "getpriority"); + ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + } + if (!attr) { err = test_send_signal_kern__attach(skel); if (!ASSERT_OK(err, "skel_attach")) { @@ -100,8 +131,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, goto destroy_skel; } } else { - pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */, - -1 /* group id */, 0 /* flags */); + if (!remote) + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */, + -1 /* group id */, 0 /* flags */); + else + pmu_fd = syscall(__NR_perf_event_open, attr, getpid(), -1 /* cpu */, + -1 /* group id */, 0 /* flags */); if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) { err = -1; goto destroy_skel; @@ -119,13 +154,36 @@ static void test_send_signal_common(struct perf_event_attr *attr, /* trigger the bpf send_signal */ skel->bss->signal_thread = signal_thread; skel->bss->sig = SIGUSR1; - skel->bss->pid = pid; + if (!remote) { + skel->bss->target_pid = 0; + skel->bss->pid = pid; + } else { + skel->bss->target_pid = pid; + skel->bss->pid = getpid(); + } /* notify child that bpf program can send_signal now */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); - /* wait for result */ - err = read(pipe_c2p[0], buf, 1); + for (retry_count = 0;;) { + /* For the remote test, the BPF program is triggered from this + * process but the other process/thread is signaled. + */ + if (remote) { + if (!attr) { + for (int i = 0; i < 10; i++) + usleep(1); + } else { + for (int i = 0; i < 100000000; i++) + j /= i + 1; + } + } + /* wait for result */ + err = read_with_timeout(pipe_c2p[0], buf, 1, 100); + if (err == -EAGAIN && retry_count++ < 10000) + continue; + break; + } if (!ASSERT_GE(err, 0, "reading pipe")) goto disable_pmu; if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) { @@ -133,7 +191,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, goto disable_pmu; } - ASSERT_EQ(buf[0], '2', "incorrect result"); + ASSERT_EQ(buf[0], 8, "incorrect result"); /* notify child safe to exit */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); @@ -142,18 +200,21 @@ disable_pmu: close(pmu_fd); destroy_skel: test_send_signal_kern__destroy(skel); + /* restore the old priority */ + if (remote) + ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); wait(NULL); } -static void test_send_signal_tracepoint(bool signal_thread) +static void test_send_signal_tracepoint(bool signal_thread, bool remote) { - test_send_signal_common(NULL, signal_thread); + test_send_signal_common(NULL, signal_thread, remote); } -static void test_send_signal_perf(bool signal_thread) +static void test_send_signal_perf(bool signal_thread, bool remote) { struct perf_event_attr attr = { .freq = 1, @@ -162,13 +223,14 @@ static void test_send_signal_perf(bool signal_thread) .config = PERF_COUNT_SW_CPU_CLOCK, }; - test_send_signal_common(&attr, signal_thread); + test_send_signal_common(&attr, signal_thread, remote); } -static void test_send_signal_nmi(bool signal_thread) +static void test_send_signal_nmi(bool signal_thread, bool remote) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; @@ -191,21 +253,35 @@ static void test_send_signal_nmi(bool signal_thread) close(pmu_fd); } - test_send_signal_common(&attr, signal_thread); + test_send_signal_common(&attr, signal_thread, remote); } void test_send_signal(void) { if (test__start_subtest("send_signal_tracepoint")) - test_send_signal_tracepoint(false); + test_send_signal_tracepoint(false, false); if (test__start_subtest("send_signal_perf")) - test_send_signal_perf(false); + test_send_signal_perf(false, false); if (test__start_subtest("send_signal_nmi")) - test_send_signal_nmi(false); + test_send_signal_nmi(false, false); if (test__start_subtest("send_signal_tracepoint_thread")) - test_send_signal_tracepoint(true); + test_send_signal_tracepoint(true, false); if (test__start_subtest("send_signal_perf_thread")) - test_send_signal_perf(true); + test_send_signal_perf(true, false); if (test__start_subtest("send_signal_nmi_thread")) - test_send_signal_nmi(true); + test_send_signal_nmi(true, false); + + /* Signal remote thread and thread group */ + if (test__start_subtest("send_signal_tracepoint_remote")) + test_send_signal_tracepoint(false, true); + if (test__start_subtest("send_signal_perf_remote")) + test_send_signal_perf(false, true); + if (test__start_subtest("send_signal_nmi_remote")) + test_send_signal_nmi(false, true); + if (test__start_subtest("send_signal_tracepoint_thread_remote")) + test_send_signal_tracepoint(true, true); + if (test__start_subtest("send_signal_perf_thread_remote")) + test_send_signal_perf(true, true); + if (test__start_subtest("send_signal_nmi_thread_remote")) + test_send_signal_nmi(true, true); } diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c index a6ee7f8d4f79..b2efabbed220 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_addr.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -23,10 +23,6 @@ #include "getpeername_unix_prog.skel.h" #include "network_helpers.h" -#ifndef ENOTSUPP -# define ENOTSUPP 524 -#endif - #define TEST_NS "sock_addr" #define TEST_IF_PREFIX "test_sock_addr" #define TEST_IPV4 "127.0.0.4" diff --git a/tools/testing/selftests/bpf/prog_tests/sock_create.c b/tools/testing/selftests/bpf/prog_tests/sock_create.c new file mode 100644 index 000000000000..187ffc5e60c4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_create.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <test_progs.h> +#include "cgroup_helpers.h" + +static char bpf_log_buf[4096]; +static bool verbose; + +enum sock_create_test_error { + OK = 0, + DENY_CREATE, +}; + +static struct sock_create_test { + const char *descr; + const struct bpf_insn insns[64]; + enum bpf_attach_type attach_type; + enum bpf_attach_type expected_attach_type; + + int domain; + int type; + int protocol; + + int optname; + int optval; + enum sock_create_test_error error; +} tests[] = { + { + .descr = "AF_INET set priority", + .insns = { + /* r3 = 123 (priority) */ + BPF_MOV64_IMM(BPF_REG_3, 123), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, priority)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_PRIORITY, + .optval = 123, + }, + { + .descr = "AF_INET6 set priority", + .insns = { + /* r3 = 123 (priority) */ + BPF_MOV64_IMM(BPF_REG_3, 123), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, priority)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_PRIORITY, + .optval = 123, + }, + { + .descr = "AF_INET set mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* get uid of process */ + BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark(666), else use uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 666), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_MARK, + .optval = 666, + }, + { + .descr = "AF_INET6 set mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* get uid of process */ + BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark(666), else use uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 666), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_MARK, + .optval = 666, + }, + { + .descr = "AF_INET bound to iface", + .insns = { + /* r3 = 1 (lo interface) */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, bound_dev_if)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_BINDTOIFINDEX, + .optval = 1, + }, + { + .descr = "AF_INET6 bound to iface", + .insns = { + /* r3 = 1 (lo interface) */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, bound_dev_if)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_BINDTOIFINDEX, + .optval = 1, + }, + { + .descr = "block AF_INET, SOCK_DGRAM, IPPROTO_ICMP socket", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + + /* sock->family == AF_INET */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET, 5), + + /* sock->type == SOCK_DGRAM */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3), + + /* sock->protocol == IPPROTO_ICMP */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, protocol)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMP, 1), + + /* return 0 (block) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMP, + + .error = DENY_CREATE, + }, + { + .descr = "block AF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6 socket", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + + /* sock->family == AF_INET6 */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET6, 5), + + /* sock->type == SOCK_DGRAM */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3), + + /* sock->protocol == IPPROTO_ICMPV6 */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, protocol)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMPV6, 1), + + /* return 0 (block) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + + .error = DENY_CREATE, + }, + { + .descr = "load w/o expected_attach_type (compat mode)", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = expected_attach_type, + .log_level = 2, + .log_buf = bpf_log_buf, + .log_size = sizeof(bpf_log_buf), + ); + int fd, insns_cnt = 0; + + for (; + insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); + insns_cnt++) { + } + insns_cnt++; + + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, + insns_cnt, &opts); + if (verbose && fd < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return fd; +} + +static int run_test(int cgroup_fd, struct sock_create_test *test) +{ + int sock_fd, err, prog_fd, optval, ret = -1; + socklen_t optlen = sizeof(optval); + + prog_fd = load_prog(test->insns, test->expected_attach_type); + if (prog_fd < 0) { + log_err("Failed to load BPF program"); + return -1; + } + + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (err < 0) { + log_err("Failed to attach BPF program"); + goto close_prog_fd; + } + + sock_fd = socket(test->domain, test->type, test->protocol); + if (sock_fd < 0) { + if (test->error == DENY_CREATE) + ret = 0; + else + log_err("Failed to create socket"); + + goto detach_prog; + } + + if (test->optname) { + err = getsockopt(sock_fd, SOL_SOCKET, test->optname, &optval, &optlen); + if (err) { + log_err("Failed to call getsockopt"); + goto cleanup; + } + + if (optval != test->optval) { + errno = 0; + log_err("getsockopt returned unexpected optval"); + goto cleanup; + } + } + + ret = test->error != OK; + +cleanup: + close(sock_fd); +detach_prog: + bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); +close_prog_fd: + close(prog_fd); + return ret; +} + +void test_sock_create(void) +{ + int cgroup_fd, i; + + cgroup_fd = test__join_cgroup("/sock_create"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].descr)) + continue; + + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + } + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c index 810c3740b2cc..788135c9c673 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c @@ -1,132 +1,35 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook - -#include <stdio.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <sys/types.h> -#include <sys/socket.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> - +#include <linux/bpf.h> +#include <test_progs.h> #include "cgroup_helpers.h" -#include <bpf/bpf_endian.h> -#include "bpf_util.h" -#define CG_PATH "/foo" -#define MAX_INSNS 512 +#define TEST_NS "sock_post_bind" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static bool verbose = false; +static char bpf_log_buf[4096]; -struct sock_test { - const char *descr; +static struct sock_post_bind_test { + const char *descr; /* BPF prog properties */ - struct bpf_insn insns[MAX_INSNS]; - enum bpf_attach_type expected_attach_type; - enum bpf_attach_type attach_type; + const struct bpf_insn insns[64]; + enum bpf_attach_type attach_type; + enum bpf_attach_type expected_attach_type; /* Socket properties */ - int domain; - int type; + int domain; + int type; /* Endpoint to bind() to */ const char *ip; unsigned short port; unsigned short port_retry; + /* Expected test result */ enum { - LOAD_REJECT, ATTACH_REJECT, BIND_REJECT, SUCCESS, RETRY_SUCCESS, RETRY_REJECT } result; -}; - -static struct sock_test tests[] = { - { - .descr = "bind4 load with invalid access: src_ip6", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip6[0])), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "bind4 load with invalid access: mark", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, mark)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "bind6 load with invalid access: src_ip4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip4)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "sock_create load with invalid access: src_port", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_port)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .result = LOAD_REJECT, - }, - { - .descr = "sock_create load w/o expected_attach_type (compat mode)", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = 0, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, - }, - { - .descr = "sock_create load w/ expected_attach_type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, - }, +} tests[] = { { .descr = "attach type mismatch bind4 vs bind6", .insns = { @@ -374,40 +277,29 @@ static struct sock_test tests[] = { }, }; -static size_t probe_prog_length(const struct bpf_insn *fp) -{ - size_t len; - - for (len = MAX_INSNS - 1; len > 0; --len) - if (fp[len].code != 0 || fp[len].imm != 0) - break; - return len + 1; -} - -static int load_sock_prog(const struct bpf_insn *prog, - enum bpf_attach_type attach_type) +static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) { - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret, insn_cnt; - - insn_cnt = probe_prog_length(prog); - - opts.expected_attach_type = attach_type; - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - opts.log_level = 2; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = expected_attach_type, + .log_level = 2, + .log_buf = bpf_log_buf, + .log_size = sizeof(bpf_log_buf), + ); + int fd, insns_cnt = 0; + + for (; + insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); + insns_cnt++) { + } + insns_cnt++; - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts); - if (verbose && ret < 0) + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, + insns_cnt, &opts); + if (fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); - return ret; -} - -static int attach_sock_prog(int cgfd, int progfd, - enum bpf_attach_type attach_type) -{ - return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); + return fd; } static int bind_sock(int domain, int type, const char *ip, @@ -477,22 +369,16 @@ out: return res; } -static int run_test_case(int cgfd, const struct sock_test *test) +static int run_test(int cgroup_fd, struct sock_post_bind_test *test) { - int progfd = -1; - int err = 0; - int res; - - printf("Test case: %s .. ", test->descr); - progfd = load_sock_prog(test->insns, test->expected_attach_type); - if (progfd < 0) { - if (test->result == LOAD_REJECT) - goto out; - else - goto err; - } + int err, prog_fd, res, ret = 0; - if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) { + prog_fd = load_prog(test->insns, test->expected_attach_type); + if (prog_fd < 0) + goto err; + + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (err < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -503,54 +389,38 @@ static int run_test_case(int cgfd, const struct sock_test *test) test->port_retry); if (res > 0 && test->result == res) goto out; - err: - err = -1; + ret = -1; out: /* Detaching w/o checking return code: best effort attempt. */ - if (progfd != -1) - bpf_prog_detach(cgfd, test->attach_type); - close(progfd); - printf("[%s]\n", err ? "FAIL" : "PASS"); - return err; + if (prog_fd != -1) + bpf_prog_detach(cgroup_fd, test->attach_type); + close(prog_fd); + return ret; } -static int run_tests(int cgfd) +void test_sock_post_bind(void) { - int passes = 0; - int fails = 0; + struct netns_obj *ns; + int cgroup_fd; int i; - for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(cgfd, &tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; + cgroup_fd = test__join_cgroup("/post_bind"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup")) + return; - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + ns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + goto cleanup; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].descr)) + continue; - if (run_tests(cgfd)) - goto err; + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + } - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; +cleanup: + netns_free(ns); + close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h new file mode 100644 index 000000000000..1bdfb79ef009 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SOCKET_HELPERS__ +#define __SOCKET_HELPERS__ + +#include <linux/vm_sockets.h> + +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 + +/* workaround for older vm_sockets.h */ +#ifndef VMADDR_CID_LOCAL +#define VMADDR_CID_LOCAL 1 +#endif + +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + +/* Wrappers that fail the test on error and report it. */ + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static inline void init_addr_loopback4(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static inline void init_addr_loopback6(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + +static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static inline int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len = 0; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static inline int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + +static inline int poll_connect(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set wfds; + int r, eval; + socklen_t esize = sizeof(eval); + + FD_ZERO(&wfds); + FD_SET(fd, &wfds); + + r = select(fd + 1, NULL, &wfds, NULL, &timeout); + if (r == 0) + errno = ETIME; + if (r != 1) + return -1; + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) + return -1; + if (eval != 0) { + errno = eval; + return -1; + } + + return 0; +} + +static inline int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static inline int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + + +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} + +#endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 82bfb266741c..884ad87783d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -12,6 +12,7 @@ #include "test_sockmap_progs_query.skel.h" #include "test_sockmap_pass_prog.skel.h" #include "test_sockmap_drop_prog.skel.h" +#include "test_sockmap_change_tail.skel.h" #include "bpf_iter_sockmap.skel.h" #include "sockmap_helpers.h" @@ -108,6 +109,35 @@ out: close(s); } +static void test_sockmap_vsock_delete_on_close(void) +{ + int err, c, p, map; + const int zero = 0; + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) + return; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_GE(map, 0, "bpf_map_create")) { + close(c); + goto out; + } + + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); + close(c); + if (!ASSERT_OK(err, "bpf_map_update")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + ASSERT_OK(err, "after close(), bpf_map_update"); + +out: + close(p); + close(map); +} + static void test_skmsg_helpers(enum bpf_map_type map_type) { struct test_skmsg_load_helpers *skel; @@ -501,6 +531,58 @@ out: test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_stream_pass(void) +{ + int zero = 0, sent, recvd; + int verdict, parser; + int err, map; + int c = -1, p = -1; + struct test_sockmap_pass_prog *pass = NULL; + char snd[256] = "0123456789"; + char rcv[256] = "0"; + + pass = test_sockmap_pass_prog__open_and_load(); + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + parser = bpf_program__fd(pass->progs.prog_skb_parser); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) + goto out; + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto out; + + err = create_pair(AF_INET, SOCK_STREAM, &c, &p); + if (err) + goto out; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out_close; + + /* + * as 'prog_skb_parser' return the original skb len and + * 'prog_skb_verdict' return SK_PASS, the kernel will just + * pass it through to original socket 'p' + */ + sent = xsend(c, snd, sizeof(snd), 0); + ASSERT_EQ(sent, sizeof(snd), "xsend(c)"); + + recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK, + IO_TIMEOUT_SEC); + ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)"); + +out_close: + close(c); + close(p); + +out: + test_sockmap_pass_prog__destroy(pass); +} + static void test_sockmap_skb_verdict_fionread(bool pass_prog) { int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1; @@ -562,6 +644,54 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_change_tail(void) +{ + struct test_sockmap_change_tail *skel; + int err, map, verdict; + int c1, p1, sent, recvd; + int zero = 0; + char buf[2]; + + skel = test_sockmap_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + sent = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(sent, 2, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "G", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "E", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 1, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_change_tail__destroy(skel); +} + static void test_sockmap_skb_verdict_peek_helper(int map) { int err, c1, p1, zero = 0, sent, recvd, avail; @@ -853,8 +983,10 @@ static void test_sockmap_same_sock(void) err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); - if (err) + if (err) { + close(tcp); goto out; + } for (i = 0; i < 2; i++) { err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); @@ -873,24 +1005,70 @@ static void test_sockmap_same_sock(void) ASSERT_OK(err, "bpf_map_update_elem(tcp)"); } + close(tcp); err = bpf_map_delete_elem(map, &zero); - ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + ASSERT_ERR(err, "bpf_map_delete_elem(entry)"); close(stream[0]); close(stream[1]); out: close(dgram); - close(tcp); close(udp); test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_skb_verdict_vsock_poll(void) +{ + struct test_sockmap_pass_prog *skel; + int err, map, conn, peer; + struct bpf_program *prog; + struct bpf_link *link; + char buf = 'x'; + int zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer)) + goto destroy; + + prog = skel->progs.prog_skb_verdict; + map = bpf_map__fd(skel->maps.sock_map_rx); + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto close; + + err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto detach; + + if (xsend(peer, &buf, 1, 0) != 1) + goto detach; + + err = poll_read(conn, IO_TIMEOUT_SEC); + if (!ASSERT_OK(err, "poll")) + goto detach; + + if (xrecv_nonblock(conn, &buf, 1, 0) != 1) + FAIL("xrecv_nonblock"); +detach: + bpf_link__detach(link); +close: + xclose(conn); + xclose(peer); +destroy: + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap vsock delete on close")) + test_sockmap_vsock_delete_on_close(); if (test__start_subtest("sockmap sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) @@ -923,10 +1101,14 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); if (test__start_subtest("sockmap skb_verdict shutdown")) test_sockmap_skb_verdict_shutdown(); + if (test__start_subtest("sockmap stream parser and verdict pass")) + test_sockmap_stream_pass(); if (test__start_subtest("sockmap skb_verdict fionread")) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict change tail")) + test_sockmap_skb_verdict_change_tail(); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) @@ -943,4 +1125,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link")) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap skb_verdict vsock poll")) + test_sockmap_skb_verdict_vsock_poll(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 38e35c72bdaa..3e5571dd578d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -1,139 +1,12 @@ #ifndef __SOCKMAP_HELPERS__ #define __SOCKMAP_HELPERS__ -#include <linux/vm_sockets.h> +#include "socket_helpers.h" -/* include/linux/net.h */ -#define SOCK_TYPE_MASK 0xf - -#define IO_TIMEOUT_SEC 30 -#define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 -/* workaround for older vm_sockets.h */ -#ifndef VMADDR_CID_LOCAL -#define VMADDR_CID_LOCAL 1 -#endif - #define __always_unused __attribute__((__unused__)) -/* include/linux/cleanup.h */ -#define __get_and_null(p, nullvalue) \ - ({ \ - __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = nullvalue; \ - __val; \ - }) - -#define take_fd(fd) __get_and_null(fd, -EBADF) - -#define _FAIL(errnum, fmt...) \ - ({ \ - error_at_line(0, (errnum), __func__, __LINE__, fmt); \ - CHECK_FAIL(true); \ - }) -#define FAIL(fmt...) _FAIL(0, fmt) -#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) -#define FAIL_LIBBPF(err, msg) \ - ({ \ - char __buf[MAX_STRERR_LEN]; \ - libbpf_strerror((err), __buf, sizeof(__buf)); \ - FAIL("%s: %s", (msg), __buf); \ - }) - -/* Wrappers that fail the test on error and report it. */ - -#define xaccept_nonblock(fd, addr, len) \ - ({ \ - int __ret = \ - accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("accept"); \ - __ret; \ - }) - -#define xbind(fd, addr, len) \ - ({ \ - int __ret = bind((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("bind"); \ - __ret; \ - }) - -#define xclose(fd) \ - ({ \ - int __ret = close((fd)); \ - if (__ret == -1) \ - FAIL_ERRNO("close"); \ - __ret; \ - }) - -#define xconnect(fd, addr, len) \ - ({ \ - int __ret = connect((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("connect"); \ - __ret; \ - }) - -#define xgetsockname(fd, addr, len) \ - ({ \ - int __ret = getsockname((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockname"); \ - __ret; \ - }) - -#define xgetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = getsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockopt(" #name ")"); \ - __ret; \ - }) - -#define xlisten(fd, backlog) \ - ({ \ - int __ret = listen((fd), (backlog)); \ - if (__ret == -1) \ - FAIL_ERRNO("listen"); \ - __ret; \ - }) - -#define xsetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = setsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("setsockopt(" #name ")"); \ - __ret; \ - }) - -#define xsend(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = send((fd), (buf), (len), (flags)); \ - if (__ret == -1) \ - FAIL_ERRNO("send"); \ - __ret; \ - }) - -#define xrecv_nonblock(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ - IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("recv"); \ - __ret; \ - }) - -#define xsocket(family, sotype, flags) \ - ({ \ - int __ret = socket(family, sotype, flags); \ - if (__ret == -1) \ - FAIL_ERRNO("socket"); \ - __ret; \ - }) - #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ @@ -193,130 +66,6 @@ __ret; \ }) -static inline void close_fd(int *fd) -{ - if (*fd >= 0) - xclose(*fd); -} - -#define __close_fd __attribute__((cleanup(close_fd))) - -static inline int poll_connect(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set wfds; - int r, eval; - socklen_t esize = sizeof(eval); - - FD_ZERO(&wfds); - FD_SET(fd, &wfds); - - r = select(fd + 1, NULL, &wfds, NULL, &timeout); - if (r == 0) - errno = ETIME; - if (r != 1) - return -1; - - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) - return -1; - if (eval != 0) { - errno = eval; - return -1; - } - - return 0; -} - -static inline int poll_read(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set rfds; - int r; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - r = select(fd + 1, &rfds, NULL, NULL, &timeout); - if (r == 0) - errno = ETIME; - - return r == 1 ? 0 : -1; -} - -static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return accept(fd, addr, len); -} - -static inline int recv_timeout(int fd, void *buf, size_t len, int flags, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return recv(fd, buf, len, flags); -} - -static inline void init_addr_loopback4(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); - - addr4->sin_family = AF_INET; - addr4->sin_port = 0; - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - *len = sizeof(*addr4); -} - -static inline void init_addr_loopback6(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); - - addr6->sin6_family = AF_INET6; - addr6->sin6_port = 0; - addr6->sin6_addr = in6addr_loopback; - *len = sizeof(*addr6); -} - -static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); - - addr->svm_family = AF_VSOCK; - addr->svm_port = VMADDR_PORT_ANY; - addr->svm_cid = VMADDR_CID_LOCAL; - *len = sizeof(*addr); -} - -static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, - socklen_t *len) -{ - switch (family) { - case AF_INET: - init_addr_loopback4(ss, len); - return; - case AF_INET6: - init_addr_loopback6(ss, len); - return; - case AF_VSOCK: - init_addr_loopback_vsock(ss, len); - return; - default: - FAIL("unsupported address family %d", family); - } -} - -static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) -{ - return (struct sockaddr *)ss; -} - static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) { u64 value; @@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int enable_reuseport(int s, int progfd) -{ - int err, one = 1; - - err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); - if (err) - return -1; - err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, - sizeof(progfd)); - if (err) - return -1; - - return 0; -} - -static inline int socket_loopback_reuseport(int family, int sotype, int progfd) -{ - struct sockaddr_storage addr; - socklen_t len = 0; - int err, s; - - init_addr_loopback(family, &addr, &len); - - s = xsocket(family, sotype, 0); - if (s == -1) - return -1; - - if (progfd >= 0) - enable_reuseport(s, progfd); - - err = xbind(s, sockaddr(&addr), len); - if (err) - goto close; - - if (sotype & SOCK_DGRAM) - return s; - - err = xlisten(s, SOMAXCONN); - if (err) - goto close; - - return s; -close: - xclose(s); - return -1; -} - -static inline int socket_loopback(int family, int sotype) -{ - return socket_loopback_reuseport(family, sotype, -1); -} - -static inline int create_pair(int family, int sotype, int *p0, int *p1) -{ - __close_fd int s, c = -1, p = -1; - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int err; - - s = socket_loopback(family, sotype); - if (s < 0) - return s; - - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - c = xsocket(family, sotype, 0); - if (c < 0) - return c; - - err = connect(c, sockaddr(&addr), len); - if (err) { - if (errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - return err; - } - - err = poll_connect(c, IO_TIMEOUT_SEC); - if (err) { - FAIL_ERRNO("poll_connect"); - return err; - } - } - - switch (sotype & SOCK_TYPE_MASK) { - case SOCK_DGRAM: - err = xgetsockname(c, sockaddr(&addr), &len); - if (err) - return err; - - err = xconnect(s, sockaddr(&addr), len); - if (err) - return err; - - *p0 = take_fd(s); - break; - case SOCK_STREAM: - case SOCK_SEQPACKET: - p = xaccept_nonblock(s, NULL, NULL); - if (p < 0) - return p; - - *p0 = take_fd(p); - break; - default: - FAIL("Unsupported socket type %#x", sotype); - return -EOPNOTSUPP; - } - - *p1 = take_fd(c); - return 0; -} - -static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, - int *p0, int *p1) -{ - int err; - - err = create_pair(family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - - return err; -} - #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c new file mode 100644 index 000000000000..4006879ca3fe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_private_stack.skel.h" +#include "struct_ops_private_stack_fail.skel.h" +#include "struct_ops_private_stack_recur.skel.h" + +static void test_private_stack(void) +{ + struct struct_ops_private_stack *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_i, 3, "val_i"); + ASSERT_EQ(skel->bss->val_j, 8, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack__destroy(skel); +} + +static void test_private_stack_fail(void) +{ + struct struct_ops_private_stack_fail *skel; + int err; + + skel = struct_ops_private_stack_fail__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_fail__load(skel); + if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) + goto cleanup; + return; + +cleanup: + struct_ops_private_stack_fail__destroy(skel); +} + +static void test_private_stack_recur(void) +{ + struct struct_ops_private_stack_recur *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack_recur__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_recur__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_j, 3, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack_recur__destroy(skel); +} + +void test_struct_ops_private_stack(void) +{ + if (test__start_subtest("private_stack")) + test_private_stack(); + if (test__start_subtest("private_stack_fail")) + test_private_stack_fail(); + if (test__start_subtest("private_stack_recur")) + test_private_stack_recur(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/subskeleton.c b/tools/testing/selftests/bpf/prog_tests/subskeleton.c index 9c31b7004f9c..fdf13ed0152a 100644 --- a/tools/testing/selftests/bpf/prog_tests/subskeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/subskeleton.c @@ -46,7 +46,8 @@ static int subskeleton_lib_subresult(struct bpf_object *obj) return result; } -void test_subskeleton(void) +/* initialize and load through skeleton, then instantiate subskeleton out of it */ +static void subtest_skel_subskeleton(void) { int err, result; struct test_subskeleton *skel; @@ -76,3 +77,76 @@ void test_subskeleton(void) cleanup: test_subskeleton__destroy(skel); } + +/* initialize and load through generic bpf_object API, then instantiate subskeleton out of it */ +static void subtest_obj_subskeleton(void) +{ + int err, result; + const void *elf_bytes; + size_t elf_bytes_sz = 0, rodata_sz = 0, bss_sz = 0; + struct bpf_object *obj; + const struct bpf_map *map; + const struct bpf_program *prog; + struct bpf_link *link = NULL; + struct test_subskeleton__rodata *rodata; + struct test_subskeleton__bss *bss; + + elf_bytes = test_subskeleton__elf_bytes(&elf_bytes_sz); + if (!ASSERT_OK_PTR(elf_bytes, "elf_bytes")) + return; + + obj = bpf_object__open_mem(elf_bytes, elf_bytes_sz, NULL); + if (!ASSERT_OK_PTR(obj, "obj_open_mem")) + return; + + map = bpf_object__find_map_by_name(obj, ".rodata"); + if (!ASSERT_OK_PTR(map, "rodata_map_by_name")) + goto cleanup; + + rodata = bpf_map__initial_value(map, &rodata_sz); + if (!ASSERT_OK_PTR(rodata, "rodata_get")) + goto cleanup; + + rodata->rovar1 = 10; + rodata->var1 = 1; + subskeleton_lib_setup(obj); + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(obj, "handler1"); + if (!ASSERT_OK_PTR(prog, "prog_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "prog_attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + map = bpf_object__find_map_by_name(obj, ".bss"); + if (!ASSERT_OK_PTR(map, "bss_map_by_name")) + goto cleanup; + + bss = bpf_map__initial_value(map, &bss_sz); + if (!ASSERT_OK_PTR(rodata, "rodata_get")) + goto cleanup; + + result = subskeleton_lib_subresult(obj) * 10; + ASSERT_EQ(bss->out1, result, "out1"); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); +} + + +void test_subskeleton(void) +{ + if (test__start_subtest("skel_subskel")) + subtest_skel_subskeleton(); + if (test__start_subtest("obj_subskel")) + subtest_obj_subskeleton(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 21c5a37846ad..544144620ca6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -7,6 +7,7 @@ #include "tailcall_bpf2bpf_hierarchy3.skel.h" #include "tailcall_freplace.skel.h" #include "tc_bpf2bpf.skel.h" +#include "tailcall_fail.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1496,8 +1497,8 @@ static void test_tailcall_bpf2bpf_hierarchy_3(void) RUN_TESTS(tailcall_bpf2bpf_hierarchy3); } -/* test_tailcall_freplace checks that the attached freplace prog is OK to - * update the prog_array map. +/* test_tailcall_freplace checks that the freplace prog fails to update the + * prog_array map, no matter whether the freplace prog attaches to its target. */ static void test_tailcall_freplace(void) { @@ -1505,7 +1506,7 @@ static void test_tailcall_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *freplace_prog; struct tc_bpf2bpf *tc_skel = NULL; - int prog_fd, map_fd; + int prog_fd, tc_prog_fd, map_fd; char buff[128] = {}; int err, key; @@ -1523,9 +1524,10 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); freplace_prog = freplace_skel->progs.entry_freplace; - err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); + err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK(err, "set_attach_target")) goto out; @@ -1533,27 +1535,121 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK(err, "tailcall_freplace__load")) goto out; - freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, - "subprog"); + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); + prog_fd = bpf_program__fd(freplace_prog); + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + + freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) goto out; - map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); - prog_fd = bpf_program__fd(freplace_prog); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + +out: + bpf_link__destroy(freplace_link); + tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +/* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail + * callee prog with freplace prog or fails to update an extended prog to + * prog_array map. + */ +static void test_tailcall_bpf2bpf_freplace(void) +{ + struct tailcall_freplace *freplace_skel = NULL; + struct bpf_link *freplace_link = NULL; + struct tc_bpf2bpf *tc_skel = NULL; + char buff[128] = {}; + int prog_fd, map_fd; + int err, key; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + tc_skel = tc_bpf2bpf__open_and_load(); + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + freplace_skel = tailcall_freplace__open(); + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) + goto out; + + err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = tailcall_freplace__load(freplace_skel); + if (!ASSERT_OK(err, "tailcall_freplace__load")) + goto out; + + /* OK to attach then detach freplace prog. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_link__destroy(freplace_link); + if (!ASSERT_OK(err, "destroy link")) + goto out; + + /* OK to update prog_array map then delete element from the map. */ + key = 0; + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); if (!ASSERT_OK(err, "update jmp_table")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_EQ(topts.retval, 34, "test_run retval"); + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to attach a tail callee prog with freplace prog. */ + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure")) + goto out; + + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to update an extended prog to prog_array map. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_ERR(err, "update jmp_table failure")) + goto out; out: bpf_link__destroy(freplace_link); - tc_bpf2bpf__destroy(tc_skel); tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +static void test_tailcall_failure() +{ + RUN_TESTS(tailcall_fail); } void test_tailcalls(void) @@ -1606,4 +1702,8 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_hierarchy_3(); if (test__start_subtest("tailcall_freplace")) test_tailcall_freplace(); + if (test__start_subtest("tailcall_bpf2bpf_freplace")) + test_tailcall_bpf2bpf_freplace(); + if (test__start_subtest("tailcall_failure")) + test_tailcall_failure(); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index d4579f735398..83b90335967a 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -68,6 +68,74 @@ cleanup: task_kfunc_success__destroy(skel); } +static int run_vpid_test(void *prog_name) +{ + struct task_kfunc_success *skel; + struct bpf_program *prog; + int prog_fd, err = 0; + + if (getpid() != 1) + return 1; + + skel = open_load_task_kfunc_skel(); + if (!skel) + return 2; + + if (skel->bss->err) { + err = 3; + goto cleanup; + } + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) { + err = 4; + goto cleanup; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + err = 5; + goto cleanup; + } + + if (bpf_prog_test_run_opts(prog_fd, NULL)) { + err = 6; + goto cleanup; + } + + if (skel->bss->err) + err = 7 + skel->bss->err; +cleanup: + task_kfunc_success__destroy(skel); + return err; +} + +static void run_vpid_success_test(const char *prog_name) +{ + const int stack_size = 1024 * 1024; + int child_pid, wstatus; + char *stack; + + stack = (char *)malloc(stack_size); + if (!ASSERT_OK_PTR(stack, "clone_stack")) + return; + + child_pid = clone(run_vpid_test, stack + stack_size, + CLONE_NEWPID | SIGCHLD, (void *)prog_name); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + + if (!ASSERT_GT(waitpid(child_pid, &wstatus, 0), -1, "waitpid")) + goto cleanup; + + if (WEXITSTATUS(wstatus) > 7) + ASSERT_OK(WEXITSTATUS(wstatus) - 7, "vpid_test_failure"); + else + ASSERT_OK(WEXITSTATUS(wstatus), "run_vpid_test_err"); +cleanup: + free(stack); +} + static const char * const success_tests[] = { "test_task_acquire_release_argument", "test_task_acquire_release_current", @@ -83,6 +151,11 @@ static const char * const success_tests[] = { "test_task_kfunc_flavor_relo_not_found", }; +static const char * const vpid_success_tests[] = { + "test_task_from_vpid_current", + "test_task_from_vpid_invalid", +}; + void test_task_kfunc(void) { int i; @@ -94,5 +167,12 @@ void test_task_kfunc(void) run_success_test(success_tests[i]); } + for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) { + if (!test__start_subtest(vpid_success_tests[i])) + continue; + + run_vpid_success_test(vpid_success_tests[i]); + } + RUN_TESTS(task_kfunc_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index c33c05161a9e..42e822ea352f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -7,12 +7,20 @@ #include <pthread.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <sys/types.h> +#include <sys/eventfd.h> +#include <sys/mman.h> #include <test_progs.h> +#include <bpf/btf.h> #include "task_local_storage_helpers.h" #include "task_local_storage.skel.h" #include "task_local_storage_exit_creds.skel.h" #include "task_ls_recursion.skel.h" #include "task_storage_nodeadlock.skel.h" +#include "uptr_test_common.h" +#include "task_ls_uptr.skel.h" +#include "uptr_update_failure.skel.h" +#include "uptr_failure.skel.h" +#include "uptr_map_failure.skel.h" static void test_sys_enter_exit(void) { @@ -23,14 +31,14 @@ static void test_sys_enter_exit(void) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = task_local_storage__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); /* 3x syscalls: 1x attach and 2x gettid */ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); @@ -99,7 +107,7 @@ static void test_recursion(void) /* trigger sys_enter, make sure it does not cause deadlock */ skel->bss->test_pid = getpid(); - syscall(SYS_gettid); + sys_gettid(); skel->bss->test_pid = 0; task_ls_recursion__detach(skel); @@ -189,7 +197,7 @@ static void test_nodeadlock(void) /* Unnecessary recursion and deadlock detection are reproducible * in the preemptible kernel. */ - if (!skel->kconfig->CONFIG_PREEMPT) { + if (!skel->kconfig->CONFIG_PREEMPTION) { test__skip(); goto done; } @@ -227,6 +235,259 @@ done: sched_setaffinity(getpid(), sizeof(old), &old); } +static struct user_data udata __attribute__((aligned(16))) = { + .a = 1, + .b = 2, +}; + +static struct user_data udata2 __attribute__((aligned(16))) = { + .a = 3, + .b = 4, +}; + +static void check_udata2(int expected) +{ + udata2.result = udata2.nested_result = 0; + usleep(1); + ASSERT_EQ(udata2.result, expected, "udata2.result"); + ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result"); +} + +static void test_uptr_basic(void) +{ + int map_fd, parent_task_fd, ev_fd; + struct value_type value = {}; + struct task_ls_uptr *skel; + pid_t child_pid, my_tid; + __u64 ev_dummy_data = 1; + int err; + + my_tid = sys_gettid(); + parent_task_fd = sys_pidfd_open(my_tid, 0); + if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd")) + return; + + ev_fd = eventfd(0, 0); + if (!ASSERT_OK_FD(ev_fd, "ev_fd")) { + close(parent_task_fd); + return; + } + + skel = task_ls_uptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + value.udata = &udata; + value.nested.udata = &udata; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "update_elem(udata)")) + goto out; + + err = task_ls_uptr__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + child_pid = fork(); + if (!ASSERT_NEQ(child_pid, -1, "fork")) + goto out; + + /* Call syscall in the child process, but access the map value of + * the parent process in the BPF program to check if the user kptr + * is translated/mapped correctly. + */ + if (child_pid == 0) { + /* child */ + + /* Overwrite the user_data in the child process to check if + * the BPF program accesses the user_data of the parent. + */ + udata.a = 0; + udata.b = 0; + + /* Wait for the parent to set child_pid */ + read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); + exit(0); + } + + skel->bss->parent_pid = my_tid; + skel->bss->target_pid = child_pid; + + write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); + + err = waitpid(child_pid, NULL, 0); + ASSERT_EQ(err, child_pid, "waitpid"); + ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result"); + ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result"); + + skel->bss->target_pid = my_tid; + + /* update_elem: uptr changes from udata1 to udata2 */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(MAGIC_VALUE + udata2.a + udata2.b); + + /* update_elem: uptr changes from udata2 uptr to NULL */ + memset(&value, 0, sizeof(value)); + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(0); + + /* update_elem: uptr changes from NULL to udata2 */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(MAGIC_VALUE + udata2.a + udata2.b); + + /* Check if user programs can access the value of user kptrs + * through bpf_map_lookup_elem(). Make sure the kernel value is not + * leaked. + */ + err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + ASSERT_EQ(value.udata, NULL, "value.udata"); + ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata"); + + /* delete_elem */ + err = bpf_map_delete_elem(map_fd, &parent_task_fd); + ASSERT_OK(err, "delete_elem(udata2)"); + check_udata2(0); + + /* update_elem: add uptr back to test map_free */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); + ASSERT_OK(err, "update_elem(udata2)"); + +out: + task_ls_uptr__destroy(skel); + close(ev_fd); + close(parent_task_fd); +} + +static void test_uptr_across_pages(void) +{ + int page_size = getpagesize(); + struct value_type value = {}; + struct task_ls_uptr *skel; + int err, task_fd, map_fd; + void *mem; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_OK_FD(task_fd, "task_fd")) + return; + + mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) { + close(task_fd); + return; + } + + skel = task_ls_uptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + value.udata = mem + page_size - offsetof(struct user_data, b); + err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); + if (!ASSERT_ERR(err, "update_elem(udata)")) + goto out; + ASSERT_EQ(errno, EOPNOTSUPP, "errno"); + + value.udata = mem + page_size - sizeof(struct user_data); + err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); + ASSERT_OK(err, "update_elem(udata)"); + +out: + task_ls_uptr__destroy(skel); + close(task_fd); + munmap(mem, page_size * 2); +} + +static void test_uptr_update_failure(void) +{ + struct value_lock_type value = {}; + struct uptr_update_failure *skel; + int err, task_fd, map_fd; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_OK_FD(task_fd, "task_fd")) + return; + + skel = uptr_update_failure__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + + value.udata = &udata; + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK); + if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)")) + goto out; + ASSERT_EQ(errno, EOPNOTSUPP, "errno"); + + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST); + if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)")) + goto out; + ASSERT_EQ(errno, ENOENT, "errno"); + + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)")) + goto out; + + value.udata = &udata2; + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); + if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)")) + goto out; + ASSERT_EQ(errno, EEXIST, "errno"); + +out: + uptr_update_failure__destroy(skel); + close(task_fd); +} + +static void test_uptr_map_failure(const char *map_name, int expected_errno) +{ + LIBBPF_OPTS(bpf_map_create_opts, create_attr); + struct uptr_map_failure *skel; + struct bpf_map *map; + struct btf *btf; + int map_fd, err; + + skel = uptr_map_failure__open(); + if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open")) + return; + + map = bpf_object__find_map_by_name(skel->obj, map_name); + btf = bpf_object__btf(skel->obj); + err = btf__load_into_kernel(btf); + if (!ASSERT_OK(err, "btf__load_into_kernel")) + goto done; + + create_attr.map_flags = bpf_map__map_flags(map); + create_attr.btf_fd = btf__fd(btf); + create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map); + create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map); + map_fd = bpf_map_create(bpf_map__type(map), map_name, + bpf_map__key_size(map), bpf_map__value_size(map), + 0, &create_attr); + if (ASSERT_ERR_FD(map_fd, "map_create")) + ASSERT_EQ(errno, expected_errno, "errno"); + else + close(map_fd); + +done: + uptr_map_failure__destroy(skel); +} + void test_task_local_storage(void) { if (test__start_subtest("sys_enter_exit")) @@ -237,4 +498,21 @@ void test_task_local_storage(void) test_recursion(); if (test__start_subtest("nodeadlock")) test_nodeadlock(); + if (test__start_subtest("uptr_basic")) + test_uptr_basic(); + if (test__start_subtest("uptr_across_pages")) + test_uptr_across_pages(); + if (test__start_subtest("uptr_update_failure")) + test_uptr_update_failure(); + if (test__start_subtest("uptr_map_failure_e2big")) { + if (getpagesize() == PAGE_SIZE) + test_uptr_map_failure("large_uptr_map", E2BIG); + else + test__skip(); + } + if (test__start_subtest("uptr_map_failure_size0")) + test_uptr_map_failure("empty_uptr_map", EINVAL); + if (test__start_subtest("uptr_map_failure_kstruct")) + test_uptr_map_failure("kstruct_uptr_map", EINVAL); + RUN_TESTS(uptr_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c new file mode 100644 index 000000000000..74752233e779 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <test_progs.h> +#include <linux/pkt_cls.h> + +#include "test_tc_change_tail.skel.h" +#include "socket_helpers.h" + +#define LO_IFINDEX 1 + +void test_tc_change_tail(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_tc_change_tail *skel = NULL; + struct bpf_link *link; + int c1, p1; + char buf[2]; + int ret; + + skel = test_tc_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load")) + return; + + link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX, + &tcx_opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx")) + goto destroy; + + skel->links.change_tail = link; + ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1); + if (!ASSERT_OK(ret, "create_pair")) + goto destroy; + + ret = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(ret, 2, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "G", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "E", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + ret = xsend(p1, "Z", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + close(c1); + close(p1); +destroy: + test_tc_change_tail__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index b9135720024c..151a4210028f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,7 +14,9 @@ #include "netlink_helpers.h" #include "tc_helpers.h" -#define ICMP_ECHO 8 +#define MARK 42 +#define PRIO 0xeb9f +#define ICMP_ECHO 8 struct icmphdr { __u8 type; @@ -33,7 +35,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns) + bool same_netns, int scrub, int peer_scrub) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -58,6 +60,8 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -118,9 +122,9 @@ static void destroy_netkit(void) static int __send_icmp(__u32 dest) { + int sock, ret, mark = MARK, prio = PRIO; struct sockaddr_in addr; struct icmphdr icmp; - int sock, ret; ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)")) @@ -135,6 +139,15 @@ static int __send_icmp(__u32 dest) if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)")) goto out; + ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (!ASSERT_OK(ret, "setsockopt(SO_MARK)")) + goto out; + + ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY, + &prio, sizeof(prio)); + if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)")) + goto out; + memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(dest); @@ -171,7 +184,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -285,7 +299,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -413,7 +428,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -527,7 +543,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true); + &ifindex, true, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -638,7 +655,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -715,7 +733,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true); + &ifindex, true, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -779,3 +798,60 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L2); serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } + +static void serial_test_tc_netkit_scrub_type(int scrub) +{ + LIBBPF_OPTS(bpf_netkit_opts, optl); + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, + &ifindex, false, scrub, scrub); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8, + BPF_NETKIT_PRIMARY), 0, "tc8_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8"); + + link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc8 = link; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); + ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); + ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_scrub(void) +{ + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c new file mode 100644 index 000000000000..107b20d43e83 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates */ +#include <test_progs.h> +#include "csum_diff_test.skel.h" + +#define BUFF_SZ 512 + +struct testcase { + unsigned long long to_buff[BUFF_SZ / 8]; + unsigned int to_buff_len; + unsigned long long from_buff[BUFF_SZ / 8]; + unsigned int from_buff_len; + unsigned short seed; + unsigned short result; +}; + +#define NUM_PUSH_TESTS 4 + +struct testcase push_tests[NUM_PUSH_TESTS] = { + { + .to_buff = { + 0xdeadbeefdeadbeef, + }, + .to_buff_len = 8, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0, + .result = 0x3b3b + }, + { + .to_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .to_buff_len = 16, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0x1234, + .result = 0x88aa + }, + { + .to_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .to_buff_len = 15, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0x1234, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0xcaa9 +#else + .result = 0x87fd +#endif + }, + { + .to_buff = { + 0x327b23c66b8b4567, + 0x66334873643c9869, + 0x19495cff74b0dc51, + 0x625558ec2ae8944a, + 0x46e87ccd238e1f29, + 0x507ed7ab3d1b58ba, + 0x41b71efb2eb141f2, + 0x7545e14679e2a9e3, + 0x5bd062c2515f007c, + 0x4db127f812200854, + 0x1f16e9e80216231b, + 0x66ef438d1190cde7, + 0x3352255a140e0f76, + 0x0ded7263109cf92e, + 0x1befd79f7fdcc233, + 0x6b68079a41a7c4c9, + 0x25e45d324e6afb66, + 0x431bd7b7519b500d, + 0x7c83e4583f2dba31, + 0x62bbd95a257130a3, + 0x628c895d436c6125, + 0x721da317333ab105, + 0x2d1d5ae92443a858, + 0x75a2a8d46763845e, + 0x79838cb208edbdab, + 0x0b03e0c64353d0cd, + 0x54e49eb4189a769b, + 0x2ca8861171f32454, + 0x02901d820836c40e, + 0x081386413a95f874, + 0x7c3dbd3d1e7ff521, + 0x6ceaf087737b8ddc, + 0x4516dde922221a70, + 0x614fd4a13006c83e, + 0x5577f8e1419ac241, + 0x05072367440badfc, + 0x77465f013804823e, + 0x5c482a977724c67e, + 0x5e884adc2463b9ea, + 0x2d51779651ead36b, + 0x153ea438580bd78f, + 0x70a64e2a3855585c, + 0x2a487cb06a2342ec, + 0x725a06fb1d4ed43b, + 0x57e4ccaf2cd89a32, + 0x4b588f547a6d8d3c, + 0x6de91b18542289ec, + 0x7644a45c38437fdb, + 0x684a481a32fff902, + 0x749abb43579478fe, + 0x1ba026fa3dc240fb, + 0x75c6c33a79a1deaa, + 0x70c6a52912e685fb, + 0x374a3fe6520eedd1, + 0x23f9c13c4f4ef005, + 0x275ac794649bb77c, + 0x1cf10fd839386575, + 0x235ba861180115be, + 0x354fe9f947398c89, + 0x741226bb15b5af5c, + 0x10233c990d34b6a8, + 0x615740953f6ab60f, + 0x77ae35eb7e0c57b1, + 0x310c50b3579be4f1, + }, + .to_buff_len = 512, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0xffff, + .result = 0xca45 + }, +}; + +#define NUM_PULL_TESTS 4 + +struct testcase pull_tests[NUM_PULL_TESTS] = { + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 8, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0, + .result = 0xc4c4 + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .from_buff_len = 16, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, + .result = 0x9bbd + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .from_buff_len = 15, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0x59be +#else + .result = 0x9c6a +#endif + }, + { + .from_buff = { + 0x327b23c66b8b4567, + 0x66334873643c9869, + 0x19495cff74b0dc51, + 0x625558ec2ae8944a, + 0x46e87ccd238e1f29, + 0x507ed7ab3d1b58ba, + 0x41b71efb2eb141f2, + 0x7545e14679e2a9e3, + 0x5bd062c2515f007c, + 0x4db127f812200854, + 0x1f16e9e80216231b, + 0x66ef438d1190cde7, + 0x3352255a140e0f76, + 0x0ded7263109cf92e, + 0x1befd79f7fdcc233, + 0x6b68079a41a7c4c9, + 0x25e45d324e6afb66, + 0x431bd7b7519b500d, + 0x7c83e4583f2dba31, + 0x62bbd95a257130a3, + 0x628c895d436c6125, + 0x721da317333ab105, + 0x2d1d5ae92443a858, + 0x75a2a8d46763845e, + 0x79838cb208edbdab, + 0x0b03e0c64353d0cd, + 0x54e49eb4189a769b, + 0x2ca8861171f32454, + 0x02901d820836c40e, + 0x081386413a95f874, + 0x7c3dbd3d1e7ff521, + 0x6ceaf087737b8ddc, + 0x4516dde922221a70, + 0x614fd4a13006c83e, + 0x5577f8e1419ac241, + 0x05072367440badfc, + 0x77465f013804823e, + 0x5c482a977724c67e, + 0x5e884adc2463b9ea, + 0x2d51779651ead36b, + 0x153ea438580bd78f, + 0x70a64e2a3855585c, + 0x2a487cb06a2342ec, + 0x725a06fb1d4ed43b, + 0x57e4ccaf2cd89a32, + 0x4b588f547a6d8d3c, + 0x6de91b18542289ec, + 0x7644a45c38437fdb, + 0x684a481a32fff902, + 0x749abb43579478fe, + 0x1ba026fa3dc240fb, + 0x75c6c33a79a1deaa, + 0x70c6a52912e685fb, + 0x374a3fe6520eedd1, + 0x23f9c13c4f4ef005, + 0x275ac794649bb77c, + 0x1cf10fd839386575, + 0x235ba861180115be, + 0x354fe9f947398c89, + 0x741226bb15b5af5c, + 0x10233c990d34b6a8, + 0x615740953f6ab60f, + 0x77ae35eb7e0c57b1, + 0x310c50b3579be4f1, + }, + .from_buff_len = 512, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0xffff, + .result = 0x35ba + }, +}; + +#define NUM_DIFF_TESTS 4 + +struct testcase diff_tests[NUM_DIFF_TESTS] = { + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 8, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 8, + .seed = 0, + .result = 0x7373 + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 7, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 7, + .seed = 0, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0xa673 +#else + .result = 0x73b7 +#endif + }, + { + .from_buff = { + 0, + }, + .from_buff_len = 8, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 8, + .seed = 0, + .result = 0xaeae + }, + { + .from_buff = { + 0xdeadbeefdeadbeef + }, + .from_buff_len = 8, + .to_buff = { + 0, + }, + .to_buff_len = 8, + .seed = 0xffff, + .result = 0xc4c4 + }, +}; + +#define NUM_EDGE_TESTS 4 + +struct testcase edge_tests[NUM_EDGE_TESTS] = { + { + .from_buff = {}, + .from_buff_len = 0, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0, + .result = 0 + }, + { + .from_buff = { + 0x1234 + }, + .from_buff_len = 0, + .to_buff = { + 0x1234 + }, + .to_buff_len = 0, + .seed = 0, + .result = 0 + }, + { + .from_buff = {}, + .from_buff_len = 0, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, + .result = 0x1234 + }, + { + .from_buff = {}, + .from_buff_len = 512, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0xffff, + .result = 0xffff + }, +}; + +static unsigned short trigger_csum_diff(const struct csum_diff_test *skel) +{ + u8 tmp_out[64 << 2] = {}; + u8 tmp_in[64] = {}; + int err; + int pfd; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = tmp_in, + .data_size_in = sizeof(tmp_in), + .data_out = tmp_out, + .data_size_out = sizeof(tmp_out), + .repeat = 1, + ); + pfd = bpf_program__fd(skel->progs.compute_checksum); + err = bpf_prog_test_run_opts(pfd, &topts); + if (err) + return -1; + + return skel->bss->result; +} + +static void test_csum_diff(struct testcase *tests, int num_tests) +{ + struct csum_diff_test *skel; + unsigned short got; + int err; + + for (int i = 0; i < num_tests; i++) { + skel = csum_diff_test__open(); + if (!ASSERT_OK_PTR(skel, "csum_diff_test open")) + return; + + skel->rodata->to_buff_len = tests[i].to_buff_len; + skel->rodata->from_buff_len = tests[i].from_buff_len; + + err = csum_diff_test__load(skel); + if (!ASSERT_EQ(err, 0, "csum_diff_test load")) + goto out; + + memcpy(skel->bss->to_buff, tests[i].to_buff, tests[i].to_buff_len); + memcpy(skel->bss->from_buff, tests[i].from_buff, tests[i].from_buff_len); + skel->bss->seed = tests[i].seed; + + got = trigger_csum_diff(skel); + ASSERT_EQ(got, tests[i].result, "csum_diff result"); + + csum_diff_test__destroy(skel); + } + + return; +out: + csum_diff_test__destroy(skel); +} + +void test_test_csum_diff(void) +{ + if (test__start_subtest("csum_diff_push")) + test_csum_diff(push_tests, NUM_PUSH_TESTS); + if (test__start_subtest("csum_diff_pull")) + test_csum_diff(pull_tests, NUM_PULL_TESTS); + if (test__start_subtest("csum_diff_diff")) + test_csum_diff(diff_tests, NUM_DIFF_TESTS); + if (test__start_subtest("csum_diff_edge")) + test_csum_diff(edge_tests, NUM_EDGE_TESTS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 871d16cb95cf..1a2f99596916 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -5,6 +5,7 @@ #include <test_progs.h> #include <pthread.h> #include <network_helpers.h> +#include <sys/sysinfo.h> #include "timer_lockup.skel.h" @@ -52,6 +53,11 @@ void test_timer_lockup(void) pthread_t thrds[2]; void *ret; + if (get_nprocs() < 2) { + test__skip(); + return; + } + skel = timer_lockup__open_and_load(); if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index fe86e4fdb89c..c3ab9b6fb069 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -828,8 +828,12 @@ static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +static const char *token_bpffs_custom_dir() +{ + return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ?: "/tmp/bpf-token-fs"; +} + #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" -#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) { @@ -892,6 +896,7 @@ static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) { + const char *custom_dir = token_bpffs_custom_dir(); LIBBPF_OPTS(bpf_object_open_opts, opts); struct dummy_st_ops_success *skel; int err; @@ -909,10 +914,10 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l * BPF token implicitly, unless pointed to it through * LIBBPF_BPF_TOKEN_PATH envvar */ - rmdir(TOKEN_BPFFS_CUSTOM); - if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + rmdir(custom_dir); + if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom")) goto err_out; - err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH); if (!ASSERT_OK(err, "move_mount_bpffs")) goto err_out; @@ -925,7 +930,7 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l goto err_out; } - err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/); if (!ASSERT_OK(err, "setenv_token_path")) goto err_out; @@ -951,11 +956,11 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l if (!ASSERT_ERR(err, "obj_empty_token_path_load")) goto err_out; - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return 0; err_out: - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 844f6fc8487b..2ee17ef1dae2 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -8,6 +8,11 @@ #include "uprobe_multi_usdt.skel.h" #include "uprobe_multi_consumers.skel.h" #include "uprobe_multi_pid_filter.skel.h" +#include "uprobe_multi_session.skel.h" +#include "uprobe_multi_session_single.skel.h" +#include "uprobe_multi_session_cookie.skel.h" +#include "uprobe_multi_session_recursive.skel.h" +#include "uprobe_multi_verifier.skel.h" #include "bpf/libbpf_internal.h" #include "testing_helpers.h" #include "../sdt.h" @@ -34,6 +39,12 @@ noinline void usdt_trigger(void) STAP_PROBE(test, pid_filter_usdt); } +noinline void uprobe_session_recursive(int i) +{ + if (i) + uprobe_session_recursive(i - 1); +} + struct child { int go[2]; int c2p[2]; /* child -> parent channel */ @@ -125,7 +136,7 @@ static void *child_thread(void *ctx) struct child *child = ctx; int c = 0, err; - child->tid = syscall(SYS_gettid); + child->tid = sys_gettid(); /* let parent know we are ready */ err = write(child->c2p[1], &c, 1); @@ -778,7 +789,7 @@ get_link(struct uprobe_multi_consumers *skel, int link) } } -static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx) +static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx, unsigned long offset) { struct bpf_program *prog = get_program(skel, idx); struct bpf_link **link = get_link(skel, idx); @@ -787,15 +798,19 @@ static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx) if (!prog || !link) return -1; + opts.offsets = &offset; + opts.cnt = 1; + /* - * bit/prog: 0,1 uprobe entry - * bit/prog: 2,3 uprobe return + * bit/prog: 0 uprobe entry + * bit/prog: 1 uprobe return + * bit/prog: 2 uprobe session without return + * bit/prog: 3 uprobe session with return */ - opts.retprobe = idx == 2 || idx == 3; + opts.retprobe = idx == 1; + opts.session = idx == 2 || idx == 3; - *link = bpf_program__attach_uprobe_multi(prog, 0, "/proc/self/exe", - "uprobe_consumer_test", - &opts); + *link = bpf_program__attach_uprobe_multi(prog, 0, "/proc/self/exe", NULL, &opts); if (!ASSERT_OK_PTR(*link, "bpf_program__attach_uprobe_multi")) return -1; return 0; @@ -816,7 +831,8 @@ static bool test_bit(int bit, unsigned long val) noinline int uprobe_consumer_test(struct uprobe_multi_consumers *skel, - unsigned long before, unsigned long after) + unsigned long before, unsigned long after, + unsigned long offset) { int idx; @@ -829,89 +845,174 @@ uprobe_consumer_test(struct uprobe_multi_consumers *skel, /* ... and attach all new programs in 'after' state */ for (idx = 0; idx < 4; idx++) { if (!test_bit(idx, before) && test_bit(idx, after)) { - if (!ASSERT_OK(uprobe_attach(skel, idx), "uprobe_attach_after")) + if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_after")) return -1; } } return 0; } -static void consumer_test(struct uprobe_multi_consumers *skel, - unsigned long before, unsigned long after) +/* + * We generate 16 consumer_testX functions that will have uprobe installed on + * and will be called in separate threads. All function pointer are stored in + * "consumers" section and each thread will pick one function based on index. + */ + +extern const void *__start_consumers; + +#define __CONSUMER_TEST(func) \ +noinline int func(struct uprobe_multi_consumers *skel, unsigned long before, \ + unsigned long after, unsigned long offset) \ +{ \ + return uprobe_consumer_test(skel, before, after, offset); \ +} \ +void *__ ## func __used __attribute__((section("consumers"))) = (void *) func; + +#define CONSUMER_TEST(func) __CONSUMER_TEST(func) + +#define C1 CONSUMER_TEST(__PASTE(consumer_test, __COUNTER__)) +#define C4 C1 C1 C1 C1 +#define C16 C4 C4 C4 C4 + +C16 + +typedef int (*test_t)(struct uprobe_multi_consumers *, unsigned long, + unsigned long, unsigned long); + +static int consumer_test(struct uprobe_multi_consumers *skel, + unsigned long before, unsigned long after, + test_t test, unsigned long offset) { - int err, idx; + int err, idx, ret = -1; printf("consumer_test before %lu after %lu\n", before, after); /* 'before' is each, we attach uprobe for every set idx */ for (idx = 0; idx < 4; idx++) { if (test_bit(idx, before)) { - if (!ASSERT_OK(uprobe_attach(skel, idx), "uprobe_attach_before")) + if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_before")) goto cleanup; } } - err = uprobe_consumer_test(skel, before, after); + err = test(skel, before, after, offset); if (!ASSERT_EQ(err, 0, "uprobe_consumer_test")) goto cleanup; for (idx = 0; idx < 4; idx++) { + bool uret_stays, uret_survives; const char *fmt = "BUG"; __u64 val = 0; - if (idx < 2) { + switch (idx) { + case 0: /* * uprobe entry * +1 if define in 'before' */ if (test_bit(idx, before)) val++; - fmt = "prog 0/1: uprobe"; - } else { + fmt = "prog 0: uprobe"; + break; + case 1: /* - * uprobe return is tricky ;-) - * - * to trigger uretprobe consumer, the uretprobe needs to be installed, - * which means one of the 'return' uprobes was alive when probe was hit: - * - * idxs: 2/3 uprobe return in 'installed' mask - * - * in addition if 'after' state removes everything that was installed in - * 'before' state, then uprobe kernel object goes away and return uprobe - * is not installed and we won't hit it even if it's in 'after' state. + * To trigger uretprobe consumer, the uretprobe under test either stayed from + * before to after (uret_stays + test_bit) or uretprobe instance survived and + * we have uretprobe active in after (uret_survives + test_bit) */ - unsigned long had_uretprobes = before & 0b1100; /* is uretprobe installed */ - unsigned long probe_preserved = before & after; /* did uprobe go away */ + uret_stays = before & after & 0b0110; + uret_survives = ((before & 0b0110) && (after & 0b0110) && (before & 0b1001)); - if (had_uretprobes && probe_preserved && test_bit(idx, after)) + if ((uret_stays || uret_survives) && test_bit(idx, after)) val++; - fmt = "idx 2/3: uretprobe"; + fmt = "prog 1: uretprobe"; + break; + case 2: + /* + * session with return + * +1 if defined in 'before' + * +1 if defined in 'after' + */ + if (test_bit(idx, before)) { + val++; + if (test_bit(idx, after)) + val++; + } + fmt = "prog 2: session with return"; + break; + case 3: + /* + * session without return + * +1 if defined in 'before' + */ + if (test_bit(idx, before)) + val++; + fmt = "prog 3: session with NO return"; + break; } - ASSERT_EQ(skel->bss->uprobe_result[idx], val, fmt); + if (!ASSERT_EQ(skel->bss->uprobe_result[idx], val, fmt)) + goto cleanup; skel->bss->uprobe_result[idx] = 0; } + ret = 0; + cleanup: for (idx = 0; idx < 4; idx++) uprobe_detach(skel, idx); + return ret; } -static void test_consumers(void) +#define CONSUMER_MAX 16 + +/* + * Each thread runs 1/16 of the load by running test for single + * 'before' number (based on thread index) and full scale of + * 'after' numbers. + */ +static void *consumer_thread(void *arg) { + unsigned long idx = (unsigned long) arg; struct uprobe_multi_consumers *skel; - int before, after; + unsigned long offset; + const void *func; + int after; skel = uprobe_multi_consumers__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_multi_consumers__open_and_load")) - return; + return NULL; + + func = *((&__start_consumers) + idx); + + offset = get_uprobe_offset(func); + if (!ASSERT_GE(offset, 0, "uprobe_offset")) + goto out; + + for (after = 0; after < CONSUMER_MAX; after++) + if (consumer_test(skel, idx, after, func, offset)) + goto out; + +out: + uprobe_multi_consumers__destroy(skel); + return NULL; +} + + +static void test_consumers(void) +{ + pthread_t pt[CONSUMER_MAX]; + unsigned long idx; + int err; /* * The idea of this test is to try all possible combinations of * uprobes consumers attached on single function. * - * - 2 uprobe entry consumer - * - 2 uprobe exit consumers + * - 1 uprobe entry consumer + * - 1 uprobe exit consumer + * - 1 uprobe session with return + * - 1 uprobe session without return * * The test uses 4 uprobes attached on single function, but that * translates into single uprobe with 4 consumers in kernel. @@ -919,37 +1020,38 @@ static void test_consumers(void) * The before/after values present the state of attached consumers * before and after the probed function: * - * bit/prog 0,1 : uprobe entry - * bit/prog 2,3 : uprobe return + * bit/prog 0 : uprobe entry + * bit/prog 1 : uprobe return * * For example for: * - * before = 0b0101 - * after = 0b0110 + * before = 0b01 + * after = 0b10 * * it means that before we call 'uprobe_consumer_test' we attach * uprobes defined in 'before' value: * - * - bit/prog 0: uprobe entry - * - bit/prog 2: uprobe return + * - bit/prog 1: uprobe entry * * uprobe_consumer_test is called and inside it we attach and detach * uprobes based on 'after' value: * - * - bit/prog 0: stays untouched - * - bit/prog 2: uprobe return is detached + * - bit/prog 0: is detached + * - bit/prog 1: is attached * * uprobe_consumer_test returns and we check counters values increased * by bpf programs on each uprobe to match the expected count based on * before/after bits. */ - for (before = 0; before < 16; before++) { - for (after = 0; after < 16; after++) - consumer_test(skel, before, after); + for (idx = 0; idx < CONSUMER_MAX; idx++) { + err = pthread_create(&pt[idx], NULL, consumer_thread, (void *) idx); + if (!ASSERT_OK(err, "pthread_create")) + break; } - uprobe_multi_consumers__destroy(skel); + while (idx) + pthread_join(pt[--idx], NULL); } static struct bpf_program *uprobe_multi_program(struct uprobe_multi_pid_filter *skel, int idx) @@ -1016,6 +1118,156 @@ static void test_pid_filter_process(bool clone_vm) uprobe_multi_pid_filter__destroy(skel); } +static void test_session_skel_api(void) +{ + struct uprobe_multi_session *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct bpf_link *link = NULL; + int err; + + skel = uprobe_multi_session__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + skel->bss->user_ptr = test_data; + + err = uprobe_multi_session__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session__attach")) + goto cleanup; + + /* trigger all probes */ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + /* + * We expect 2 for uprobe_multi_func_2 because it runs both entry/return probe, + * uprobe_multi_func_[13] run just the entry probe. All expected numbers are + * doubled, because we run extra test for sleepable session. + */ + ASSERT_EQ(skel->bss->uprobe_session_result[0], 2, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_session_result[1], 4, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_session_result[2], 2, "uprobe_multi_func_3_result"); + + /* We expect increase in 3 entry and 1 return session calls -> 4 */ + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 4, "uprobe_multi_sleep_result"); + +cleanup: + bpf_link__destroy(link); + uprobe_multi_session__destroy(skel); +} + +static void test_session_single_skel_api(void) +{ + struct uprobe_multi_session_single *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + int err; + + skel = uprobe_multi_session_single__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_single__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_single__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_single__attach")) + goto cleanup; + + uprobe_multi_func_1(); + + /* + * We expect consumer 0 and 2 to trigger just entry handler (value 1) + * and consumer 1 to hit both (value 2). + */ + ASSERT_EQ(skel->bss->uprobe_session_result[0], 1, "uprobe_session_result_0"); + ASSERT_EQ(skel->bss->uprobe_session_result[1], 2, "uprobe_session_result_1"); + ASSERT_EQ(skel->bss->uprobe_session_result[2], 1, "uprobe_session_result_2"); + +cleanup: + uprobe_multi_session_single__destroy(skel); +} + +static void test_session_cookie_skel_api(void) +{ + struct uprobe_multi_session_cookie *skel = NULL; + int err; + + skel = uprobe_multi_session_cookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_cookie__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_cookie__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_cookie__attach")) + goto cleanup; + + /* trigger all probes */ + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + ASSERT_EQ(skel->bss->test_uprobe_1_result, 1, "test_uprobe_1_result"); + ASSERT_EQ(skel->bss->test_uprobe_2_result, 2, "test_uprobe_2_result"); + ASSERT_EQ(skel->bss->test_uprobe_3_result, 3, "test_uprobe_3_result"); + +cleanup: + uprobe_multi_session_cookie__destroy(skel); +} + +static void test_session_recursive_skel_api(void) +{ + struct uprobe_multi_session_recursive *skel = NULL; + int i, err; + + skel = uprobe_multi_session_recursive__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_recursive__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_recursive__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_recursive__attach")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(skel->bss->test_uprobe_cookie_entry); i++) + skel->bss->test_uprobe_cookie_entry[i] = i + 1; + + uprobe_session_recursive(5); + + /* + * entry uprobe: + * uprobe_session_recursive(5) { *cookie = 1, return 0 + * uprobe_session_recursive(4) { *cookie = 2, return 1 + * uprobe_session_recursive(3) { *cookie = 3, return 0 + * uprobe_session_recursive(2) { *cookie = 4, return 1 + * uprobe_session_recursive(1) { *cookie = 5, return 0 + * uprobe_session_recursive(0) { *cookie = 6, return 1 + * return uprobe: + * } i = 0 not executed + * } i = 1 test_uprobe_cookie_return[0] = 5 + * } i = 2 not executed + * } i = 3 test_uprobe_cookie_return[1] = 3 + * } i = 4 not executed + * } i = 5 test_uprobe_cookie_return[2] = 1 + */ + + ASSERT_EQ(skel->bss->idx_entry, 6, "idx_entry"); + ASSERT_EQ(skel->bss->idx_return, 3, "idx_return"); + + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[0], 5, "test_uprobe_cookie_return[0]"); + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[1], 3, "test_uprobe_cookie_return[1]"); + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[2], 1, "test_uprobe_cookie_return[2]"); + +cleanup: + uprobe_multi_session_recursive__destroy(skel); +} + static void test_bench_attach_uprobe(void) { long attach_start_ns = 0, attach_end_ns = 0; @@ -1112,4 +1364,13 @@ void test_uprobe_multi_test(void) test_pid_filter_process(false); if (test__start_subtest("filter_clone_vm")) test_pid_filter_process(true); + if (test__start_subtest("session")) + test_session_skel_api(); + if (test__start_subtest("session_single")) + test_session_single_skel_api(); + if (test__start_subtest("session_cookie")) + test_session_cookie_skel_api(); + if (test__start_subtest("session_cookie_recursive")) + test_session_recursive_skel_api(); + RUN_TESTS(uprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e26b5150fc43..3ee40ee9413a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -44,6 +44,7 @@ #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" +#include "verifier_linked_scalars.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" #include "verifier_map_in_map.skel.h" @@ -53,12 +54,14 @@ #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" +#include "verifier_mtu.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_bpf_fastcall.skel.h" #include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" +#include "verifier_private_stack.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -170,6 +173,7 @@ void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } +void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); } @@ -185,6 +189,7 @@ void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } +void test_verifier_private_stack(void) { RUN(verifier_private_stack); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } @@ -220,6 +225,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } +void test_verifier_mtu(void) { RUN(verifier_mtu); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 481626a875d1..c7f74f068e78 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -2,35 +2,41 @@ #include <uapi/linux/bpf.h> #include <linux/if_link.h> #include <test_progs.h> +#include <network_helpers.h> #include "test_xdp_with_cpumap_frags_helpers.skel.h" #include "test_xdp_with_cpumap_helpers.skel.h" #define IFINDEX_LO 1 +#define TEST_NS "cpu_attach_ns" static void test_xdp_with_cpumap_helpers(void) { - struct test_xdp_with_cpumap_helpers *skel; + struct test_xdp_with_cpumap_helpers *skel = NULL; struct bpf_prog_info info = {}; __u32 len = sizeof(info); struct bpf_cpumap_val val = { .qsize = 192, }; - int err, prog_fd, map_fd; + int err, prog_fd, prog_redir_fd, map_fd; + struct nstoken *nstoken = NULL; __u32 idx = 0; + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + SYS(out_close, "ip link set dev lo up"); + skel = test_xdp_with_cpumap_helpers__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) return; - prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); + prog_redir_fd = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(IFINDEX_LO, prog_redir_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) goto out_close; - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); - ASSERT_OK(err, "XDP program detach"); - prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); @@ -45,6 +51,26 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_OK(err, "Read cpumap entry"); ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); + /* send a packet to trigger any potential bugs in there */ + char data[10] = {}; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(prog_redir_fd, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed, then check that redirect has been + * performed + */ + kern_sync_rcu(); + ASSERT_NEQ(skel->bss->redirect_count, 0, "redirected packets"); + + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + /* can not attach BPF_XDP_CPUMAP program to a device */ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) @@ -65,6 +91,8 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); test_xdp_with_cpumap_helpers__destroy(skel); } @@ -111,7 +139,7 @@ out_close: test_xdp_with_cpumap_frags_helpers__destroy(skel); } -void serial_test_xdp_cpumap_attach(void) +void test_xdp_cpumap_attach(void) { if (test__start_subtest("CPUMAP with programs in entries")) test_xdp_with_cpumap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index ce6812558287..27ffed17d4be 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include <arpa/inet.h> #include <uapi/linux/bpf.h> #include <linux/if_link.h> +#include <network_helpers.h> +#include <net/if.h> #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" @@ -8,31 +11,36 @@ #include "test_xdp_with_devmap_helpers.skel.h" #define IFINDEX_LO 1 +#define TEST_NS "devmap_attach_ns" static void test_xdp_with_devmap_helpers(void) { - struct test_xdp_with_devmap_helpers *skel; + struct test_xdp_with_devmap_helpers *skel = NULL; struct bpf_prog_info info = {}; struct bpf_devmap_val val = { .ifindex = IFINDEX_LO, }; __u32 len = sizeof(info); - int err, dm_fd, map_fd; + int err, dm_fd, dm_fd_redir, map_fd; + struct nstoken *nstoken = NULL; + char data[10] = {}; __u32 idx = 0; + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + SYS(out_close, "ip link set dev lo up"); skel = test_xdp_with_devmap_helpers__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) - return; + goto out_close; - dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); + dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(IFINDEX_LO, dm_fd_redir, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap")) goto out_close; - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); - ASSERT_OK(err, "XDP program detach"); - dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); map_fd = bpf_map__fd(skel->maps.dm_ports); err = bpf_prog_get_info_by_fd(dm_fd, &info, &len); @@ -47,6 +55,22 @@ static void test_xdp_with_devmap_helpers(void) ASSERT_OK(err, "Read devmap entry"); ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); + /* send a packet to trigger any potential bugs in there */ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(dm_fd_redir, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed */ + kern_sync_rcu(); + + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + /* can not attach BPF_XDP_DEVMAP program to a device */ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program")) @@ -67,6 +91,8 @@ static void test_xdp_with_devmap_helpers(void) ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry"); out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); test_xdp_with_devmap_helpers__destroy(skel); } @@ -124,6 +150,86 @@ out_close: test_xdp_with_devmap_frags_helpers__destroy(skel); } +static void test_xdp_with_devmap_helpers_veth(void) +{ + struct test_xdp_with_devmap_helpers *skel = NULL; + struct bpf_prog_info info = {}; + struct bpf_devmap_val val = {}; + struct nstoken *nstoken = NULL; + __u32 len = sizeof(info); + int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; + char data[10] = {}; + __u32 idx = 0; + + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + + SYS(out_close, "ip link add veth_src type veth peer name veth_dst"); + SYS(out_close, "ip link set dev veth_src up"); + SYS(out_close, "ip link set dev veth_dst up"); + + val.ifindex = if_nametoindex("veth_src"); + ifindex_dst = if_nametoindex("veth_dst"); + if (!ASSERT_NEQ(val.ifindex, 0, "val.ifindex") || + !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst")) + goto out_close; + + skel = test_xdp_with_devmap_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) + goto out_close; + + dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(val.ifindex, dm_fd_redir, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "Attach of program with 8-byte devmap")) + goto out_close; + + dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); + map_fd = bpf_map__fd(skel->maps.dm_ports); + err = bpf_prog_get_info_by_fd(dm_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = dm_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add program to devmap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); + + /* attach dummy to other side to enable reception */ + dm_fd = bpf_program__fd(skel->progs.xdp_dummy_prog); + err = bpf_xdp_attach(ifindex_dst, dm_fd, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "Attach of dummy XDP")) + goto out_close; + + /* send a packet to trigger any potential bugs in there */ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(dm_fd_redir, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed */ + kern_sync_rcu(); + + err = bpf_xdp_detach(val.ifindex, XDP_FLAGS_DRV_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + + err = bpf_xdp_detach(ifindex_dst, XDP_FLAGS_DRV_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + +out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); + test_xdp_with_devmap_helpers__destroy(skel); +} + void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) @@ -134,4 +240,7 @@ void serial_test_xdp_devmap_attach(void) if (test__start_subtest("Verifier check of DEVMAP programs")) test_neg_xdp_devmap_helpers(); + + if (test__start_subtest("DEVMAP with programs in entries on veth")) + test_xdp_with_devmap_helpers_veth(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h deleted file mode 100644 index c41ee80533ca..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used -#define bpf_iter__netlink bpf_iter__netlink___not_used -#define bpf_iter__task bpf_iter__task___not_used -#define bpf_iter__task_file bpf_iter__task_file___not_used -#define bpf_iter__task_vma bpf_iter__task_vma___not_used -#define bpf_iter__tcp bpf_iter__tcp___not_used -#define tcp6_sock tcp6_sock___not_used -#define bpf_iter__udp bpf_iter__udp___not_used -#define udp6_sock udp6_sock___not_used -#define bpf_iter__unix bpf_iter__unix___not_used -#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used -#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used -#define bpf_iter__sockmap bpf_iter__sockmap___not_used -#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used -#define bpf_iter__cgroup bpf_iter__cgroup___not_used -#define btf_ptr btf_ptr___not_used -#define BTF_F_COMPACT BTF_F_COMPACT___not_used -#define BTF_F_NONAME BTF_F_NONAME___not_used -#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used -#define BTF_F_ZERO BTF_F_ZERO___not_used -#define bpf_iter__ksym bpf_iter__ksym___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map -#undef bpf_iter__ipv6_route -#undef bpf_iter__netlink -#undef bpf_iter__task -#undef bpf_iter__task_file -#undef bpf_iter__task_vma -#undef bpf_iter__tcp -#undef tcp6_sock -#undef bpf_iter__udp -#undef udp6_sock -#undef bpf_iter__unix -#undef bpf_iter__bpf_map_elem -#undef bpf_iter__bpf_sk_storage_map -#undef bpf_iter__sockmap -#undef bpf_iter__bpf_link -#undef bpf_iter__cgroup -#undef btf_ptr -#undef BTF_F_COMPACT -#undef BTF_F_NONAME -#undef BTF_F_PTR_RAW -#undef BTF_F_ZERO -#undef bpf_iter__ksym - -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__ipv6_route { - struct bpf_iter_meta *meta; - struct fib6_info *rt; -} __attribute__((preserve_access_index)); - -struct bpf_iter__netlink { - struct bpf_iter_meta *meta; - struct netlink_sock *sk; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_file { - struct bpf_iter_meta *meta; - struct task_struct *task; - __u32 fd; - struct file *file; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_vma { - struct bpf_iter_meta *meta; - struct task_struct *task; - struct vm_area_struct *vma; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - -struct bpf_iter__tcp { - struct bpf_iter_meta *meta; - struct sock_common *sk_common; - uid_t uid; -} __attribute__((preserve_access_index)); - -struct tcp6_sock { - struct tcp_sock tcp; - struct ipv6_pinfo inet6; -} __attribute__((preserve_access_index)); - -struct bpf_iter__udp { - struct bpf_iter_meta *meta; - struct udp_sock *udp_sk; - uid_t uid __attribute__((aligned(8))); - int bucket __attribute__((aligned(8))); -} __attribute__((preserve_access_index)); - -struct udp6_sock { - struct udp_sock udp; - struct ipv6_pinfo inet6; -} __attribute__((preserve_access_index)); - -struct bpf_iter__unix { - struct bpf_iter_meta *meta; - struct unix_sock *unix_sk; - uid_t uid; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map_elem { - struct bpf_iter_meta *meta; - struct bpf_map *map; - void *key; - void *value; -}; - -struct bpf_iter__bpf_sk_storage_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; - struct sock *sk; - void *value; -}; - -struct bpf_iter__sockmap { - struct bpf_iter_meta *meta; - struct bpf_map *map; - void *key; - struct sock *sk; -}; - -struct bpf_iter__bpf_link { - struct bpf_iter_meta *meta; - struct bpf_link *link; -}; - -struct bpf_iter__cgroup { - struct bpf_iter_meta *meta; - struct cgroup *cgroup; -} __attribute__((preserve_access_index)); - -struct btf_ptr { - void *ptr; - __u32 type_id; - __u32 flags; -}; - -enum { - BTF_F_COMPACT = (1ULL << 0), - BTF_F_NONAME = (1ULL << 1), - BTF_F_PTR_RAW = (1ULL << 2), - BTF_F_ZERO = (1ULL << 3), -}; - -struct bpf_iter__ksym { - struct bpf_iter_meta *meta; - struct kallsym_iter *ksym; -}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index 564835ba7d51..19710cc0f250 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index d7a69217fb68..f47da665f7e0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c index e1af2f8f75a6..7b69e1887705 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Red Hat, Inc. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index 6c39e86b666f..c868ffb8080f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c index 9f0e0705b2bf..9fdea8cd4c6f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c index 5014a17d6c02..aa529f76c7fc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c index 6cecab2b32ba..e88dab196e0f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Google LLC. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c index c7b8e006b171..eb9642923e1c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index 784a610ce039..73a5cf3ba3d3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 521267818f4d..3e725b1fce37 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022, Oracle and/or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index a28e51e2dcee..00b2ceae81fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index ec7f91850dec..774d4dbe8189 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c index eafc877ea460..d92631ec6161 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright Amazon.com Inc. or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <limits.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c index f3af0e30cead..317fe49760cc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Cloudflare */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c index bca8b889cb10..ef2f7c8d9373 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020, Oracle and/or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index b0255080662d..959a8d899eaf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 442f4ca39fd7..f5a309455490 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index 423b39e60b6f..d64ba7ddaed5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 6cbb3393f243..bc10c4e4b4fa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 92267abb462f..d22449c69363 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 943f7bba180e..8b072666f9d9 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c index 2a4647f20c46..6b17e7e86a48 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index dbf61c44acac..56177508798f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c index e3a7575e81d2..9d8b7310d2c2 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c index 1c7304f56b1e..b150bd468824 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h index d5e3df66ad9a..6a4c50497c5e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index cf0c485b1ed7..ffbd4b116d17 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 5031e21c433f..47ff7754f4fd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index e6aefae38894..fea275df9e22 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright Amazon.com Inc. or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c index ee7455d2623a..174298e122d3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index eccaf955e394..f45f4352feeb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -5,6 +5,10 @@ #define XSTR(s) STR(s) #define STR(s) #s +/* Expand a macro and then stringize the expansion */ +#define QUOTE(str) #str +#define EXPAND_QUOTE(str) QUOTE(str) + /* This set of attributes controls behavior of the * test_loader.c:test_loader__run_subtests(). * @@ -106,6 +110,7 @@ * __arch_* Specify on which architecture the test case should be tested. * Several __arch_* annotations could be specified at once. * When test case is not run on current arch it is marked as skipped. + * __caps_unpriv Specify the capabilities that should be set when running the test. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) #define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg))) @@ -129,6 +134,13 @@ #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") +#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) + +/* Define common capabilities tested using __caps_unpriv */ +#define CAP_NET_ADMIN 12 +#define CAP_SYS_ADMIN 21 +#define CAP_PERFMON 38 +#define CAP_BPF 39 /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c index de03997322a7..f30841997a8d 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_iter.c +++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Google */ - -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 5e282c16eadc..a2de95f85648 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ - -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..43cada48b28a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..f9a622705f1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index b979e91f55f0..4ece7873ba60 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -7,6 +7,11 @@ #include "errno.h" #include <stdbool.h> +/* Should use BTF_FIELDS_MAX, but it is not always available in vmlinux.h, + * so use the hard-coded number as a workaround. + */ +#define CPUMASK_KPTR_FIELDS_MAX 11 + int err; #define private(name) SEC(".bss." #name) __attribute__((aligned(8))) diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index a988d2823b52..b40b52548ffb 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -10,6 +10,21 @@ char _license[] SEC("license") = "GPL"; +struct kptr_nested_array_2 { + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_array_1 { + /* Make btf_parse_fields() in map_create() return -E2BIG */ + struct kptr_nested_array_2 d_2[CPUMASK_KPTR_FIELDS_MAX + 1]; +}; + +struct kptr_nested_array { + struct kptr_nested_array_1 d_1; +}; + +private(MASK_NESTED) static struct kptr_nested_array global_mask_nested_arr; + /* Prototype for all of the program trace events below: * * TRACE_EVENT(task_newtask, @@ -187,3 +202,23 @@ int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 c return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("has no valid kptr") +int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask_nested_arr.d_1.d_2[CPUMASK_KPTR_FIELDS_MAX].mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index fd8106831c32..80ee469b0b60 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -31,11 +31,59 @@ struct kptr_nested_deep { struct kptr_nested_pair ptr_pairs[3]; }; +struct kptr_nested_deep_array_1_2 { + int dummy; + struct bpf_cpumask __kptr * mask[CPUMASK_KPTR_FIELDS_MAX]; +}; + +struct kptr_nested_deep_array_1_1 { + int dummy; + struct kptr_nested_deep_array_1_2 d_2; +}; + +struct kptr_nested_deep_array_1 { + long dummy; + struct kptr_nested_deep_array_1_1 d_1; +}; + +struct kptr_nested_deep_array_2_2 { + long dummy[2]; + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_deep_array_2_1 { + int dummy; + struct kptr_nested_deep_array_2_2 d_2[CPUMASK_KPTR_FIELDS_MAX]; +}; + +struct kptr_nested_deep_array_2 { + long dummy; + struct kptr_nested_deep_array_2_1 d_1; +}; + +struct kptr_nested_deep_array_3_2 { + long dummy[2]; + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_deep_array_3_1 { + int dummy; + struct kptr_nested_deep_array_3_2 d_2; +}; + +struct kptr_nested_deep_array_3 { + long dummy; + struct kptr_nested_deep_array_3_1 d_1[CPUMASK_KPTR_FIELDS_MAX]; +}; + private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2]; private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1]; private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1]; private(MASK) static struct kptr_nested global_mask_nested[2]; private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep; +private(MASK_1) static struct kptr_nested_deep_array_1 global_mask_nested_deep_array_1; +private(MASK_2) static struct kptr_nested_deep_array_2 global_mask_nested_deep_array_2; +private(MASK_3) static struct kptr_nested_deep_array_3 global_mask_nested_deep_array_3; static bool is_test_task(void) { @@ -543,12 +591,21 @@ static int _global_mask_array_rcu(struct bpf_cpumask **mask0, goto err_exit; } - /* [<mask 0>, NULL] */ - if (!*mask0 || *mask1) { + /* [<mask 0>, *] */ + if (!*mask0) { err = 2; goto err_exit; } + if (!mask1) + goto err_exit; + + /* [*, NULL] */ + if (*mask1) { + err = 3; + goto err_exit; + } + local = create_cpumask(); if (!local) { err = 9; @@ -632,6 +689,23 @@ int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clo } SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_deep_array_rcu, struct task_struct *task, u64 clone_flags) +{ + int i; + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_1.d_1.d_2.mask[i], NULL); + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_2.d_1.d_2[i].mask, NULL); + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_3.d_1[i].d_2.mask, NULL); + + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *local; diff --git a/tools/testing/selftests/bpf/progs/csum_diff_test.c b/tools/testing/selftests/bpf/progs/csum_diff_test.c new file mode 100644 index 000000000000..9438f1773a58 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/csum_diff_test.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates */ +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define BUFF_SZ 512 + +/* Will be updated by benchmark before program loading */ +char to_buff[BUFF_SZ]; +const volatile unsigned int to_buff_len = 0; +char from_buff[BUFF_SZ]; +const volatile unsigned int from_buff_len = 0; +unsigned short seed = 0; + +short result; + +char _license[] SEC("license") = "GPL"; + +SEC("tc") +int compute_checksum(void *ctx) +{ + int to_len_half = to_buff_len / 2; + int from_len_half = from_buff_len / 2; + short result2; + + /* Calculate checksum in one go */ + result2 = bpf_csum_diff((void *)from_buff, from_buff_len, + (void *)to_buff, to_buff_len, seed); + + /* Calculate checksum by concatenating bpf_csum_diff()*/ + result = bpf_csum_diff((void *)from_buff, from_buff_len - from_len_half, + (void *)to_buff, to_buff_len - to_len_half, seed); + + result = bpf_csum_diff((void *)from_buff + (from_buff_len - from_len_half), from_len_half, + (void *)to_buff + (to_buff_len - to_len_half), to_len_half, result); + + result = (result == result2) ? result : 0; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 8f36c9de7591..dfd817d0348c 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -428,7 +428,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -1407,7 +1407,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) /* bpf_dynptr_adjust can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_adjust_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1420,7 +1420,7 @@ int dynptr_adjust_invalid(void *ctx) /* bpf_dynptr_is_null can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_null_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1433,7 +1433,7 @@ int dynptr_is_null_invalid(void *ctx) /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_rdonly_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1446,7 +1446,7 @@ int dynptr_is_rdonly_invalid(void *ctx) /* bpf_dynptr_size can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_size_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1459,7 +1459,7 @@ int dynptr_size_invalid(void *ctx) /* Only initialized dynptrs can be cloned */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int clone_invalid1(void *ctx) { struct bpf_dynptr ptr1 = {}; @@ -1493,7 +1493,7 @@ int clone_invalid2(struct xdp_md *xdp) /* Invalidating a dynptr should invalidate its clones */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate1(void *ctx) { struct bpf_dynptr clone; @@ -1514,7 +1514,7 @@ int clone_invalidate1(void *ctx) /* Invalidating a dynptr should invalidate its parent */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate2(void *ctx) { struct bpf_dynptr ptr; @@ -1535,7 +1535,7 @@ int clone_invalidate2(void *ctx) /* Invalidating a dynptr should invalidate its siblings */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate3(void *ctx) { struct bpf_dynptr ptr; @@ -1723,7 +1723,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) } SEC("?raw_tp") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int test_dynptr_reg_type(void *ctx) { struct task_struct *current = NULL; diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 9cceb6521143..fe0f3fa5aab6 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("bpf_rcu_read_unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("bpf_rcu_read_unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index ef70b88bccb2..7c969c127573 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1486,4 +1486,30 @@ int iter_subprog_check_stacksafe(const void *ctx) return 0; } +struct bpf_iter_num global_it; + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_new_bad_arg(const void *ctx) +{ + bpf_iter_num_new(&global_it, 0, 1); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_next_bad_arg(const void *ctx) +{ + bpf_iter_num_next(&global_it); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_destroy_bad_arg(const void *ctx) +{ + bpf_iter_num_destroy(&global_it); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index d47e59aba6de..f41257eadbb2 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int destroy_without_creating_fail(void *ctx) { /* init with zeros to stop verifier complaining about uninit stack */ @@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_direct_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_helper_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx) } SEC("?raw_tp") -__failure __msg("expected uninitialized iter_num as arg #1") +__failure __msg("expected uninitialized iter_num as arg #0") int double_create_fail(void *ctx) { struct bpf_iter_num iter; @@ -258,7 +258,7 @@ int double_create_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int double_destroy_fail(void *ctx) { struct bpf_iter_num iter; @@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_without_new_fail(void *ctx) { struct bpf_iter_num iter; @@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_after_destroy_fail(void *ctx) { struct bpf_iter_num iter; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 4a176e6aede8..6543d5b6e0a9 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_before_bad(const void *ctx) { struct bpf_iter_testmod_seq it; @@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_after_bad(const void *ctx) { struct bpf_iter_testmod_seq it; diff --git a/tools/testing/selftests/bpf/progs/kfunc_module_order.c b/tools/testing/selftests/bpf/progs/kfunc_module_order.c new file mode 100644 index 000000000000..76003d04c95f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_module_order.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +extern int bpf_test_modorder_retx(void) __ksym; +extern int bpf_test_modorder_rety(void) __ksym; + +SEC("classifier") +int call_kfunc_xy(struct __sk_buff *skb) +{ + int ret1, ret2; + + ret1 = bpf_test_modorder_retx(); + ret2 = bpf_test_modorder_rety(); + + return ret1 == 'x' && ret2 == 'y' ? 0 : -1; +} + +SEC("classifier") +int call_kfunc_yx(struct __sk_buff *skb) +{ + int ret1, ret2; + + ret1 = bpf_test_modorder_rety(); + ret2 = bpf_test_modorder_retx(); + + return ret1 == 'y' && ret2 == 'x' ? 0 : -1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c new file mode 100644 index 000000000000..b9c8f9457492 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +#define SLAB_NAME_MAX 32 + +struct kmem_cache_result { + char name[SLAB_NAME_MAX]; + long obj_size; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(void *)); + __uint(value_size, SLAB_NAME_MAX); + __uint(max_entries, 1); +} slab_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct kmem_cache_result)); + __uint(max_entries, 1024); +} slab_result SEC(".maps"); + +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym; + +/* Result, will be checked by userspace */ +int task_struct_found; +int kmem_cache_seen; +int open_coded_seen; + +SEC("iter/kmem_cache") +int slab_info_collector(struct bpf_iter__kmem_cache *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kmem_cache *s = ctx->s; + struct kmem_cache_result *r; + int idx; + + if (s) { + /* To make sure if the slab_iter implements the seq interface + * properly and it's also useful for debugging. + */ + BPF_SEQ_PRINTF(seq, "%s: %u\n", s->name, s->size); + + idx = kmem_cache_seen; + r = bpf_map_lookup_elem(&slab_result, &idx); + if (r == NULL) + return 0; + + kmem_cache_seen++; + + /* Save name and size to match /proc/slabinfo */ + bpf_probe_read_kernel_str(r->name, sizeof(r->name), s->name); + r->obj_size = s->size; + + if (!bpf_strncmp(r->name, 11, "task_struct")) + bpf_map_update_elem(&slab_hash, &s, r->name, BPF_NOEXIST); + } + + return 0; +} + +SEC("raw_tp/bpf_test_finish") +int BPF_PROG(check_task_struct) +{ + u64 curr = bpf_get_current_task(); + struct kmem_cache *s; + char *name; + + s = bpf_get_kmem_cache(curr); + if (s == NULL) { + task_struct_found = -1; + return 0; + } + name = bpf_map_lookup_elem(&slab_hash, &s); + if (name && !bpf_strncmp(name, 11, "task_struct")) + task_struct_found = 1; + else + task_struct_found = -2; + return 0; +} + +SEC("syscall") +int open_coded_iter(const void *ctx) +{ + struct kmem_cache *s; + + bpf_for_each(kmem_cache, s) { + struct kmem_cache_result *r; + + r = bpf_map_lookup_elem(&slab_result, &open_coded_seen); + if (!r) + break; + + if (r->obj_size != s->size) + break; + + open_coded_seen++; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c new file mode 100644 index 000000000000..288577e81deb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + + +SEC("kprobe.session") +__success +int kprobe_session_return_0(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe.session") +__success +int kprobe_session_return_1(struct pt_regs *ctx) +{ + return 1; +} + +SEC("kprobe.session") +__failure +__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int kprobe_session_return_2(struct pt_regs *ctx) +{ + return 2; +} diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index cc79dddac182..049a1f78de3f 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -63,6 +63,8 @@ extern int set_output_val2(int x); /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ __hidden extern void set_output_ctx2(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler1, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen1(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 942cc5526ddf..96850759fd8d 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -63,6 +63,8 @@ extern int set_output_val1(int x); /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ __hidden extern void set_output_ctx1(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler2, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen2(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h new file mode 100644 index 000000000000..3b188ccdcc40 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __MPTCP_BPF_H__ +#define __MPTCP_BPF_H__ + +#include "bpf_experimental.h" + +/* list helpers from include/linux/list.h */ +static inline int list_is_head(const struct list_head *list, + const struct list_head *head) +{ + return list == head; +} + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +#define list_entry_is_head(pos, head, member) \ + list_is_head(&pos->member, (head)) + +/* small difference: 'can_loop' has been added in the conditions */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member); \ + !list_entry_is_head(pos, head, member) && can_loop; \ + pos = list_next_entry(pos, member)) + +/* mptcp helpers from protocol.h */ +#define mptcp_for_each_subflow(__msk, __subflow) \ + list_for_each_entry(__subflow, &((__msk)->conn_list), node) + +static __always_inline struct sock * +mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + return subflow->tcp_sock; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c new file mode 100644 index 000000000000..70302477e326 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2024, Kylin Software */ + +/* vmlinux.h, bpf_helpers.h and other 'define' */ +#include "bpf_tracing_net.h" +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; + +char cc[TCP_CA_NAME_MAX] = "reno"; +int pid; + +/* Associate a subflow counter to each token */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} mptcp_sf SEC(".maps"); + +SEC("sockops") +int mptcp_subflow(struct bpf_sock_ops *skops) +{ + __u32 init = 1, key, mark, *cnt; + struct mptcp_sock *msk; + struct bpf_sock *sk; + int err; + + if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + msk = bpf_skc_to_mptcp_sock(sk); + if (!msk) + return 1; + + key = msk->token; + cnt = bpf_map_lookup_elem(&mptcp_sf, &key); + if (cnt) { + /* A new subflow is added to an existing MPTCP connection */ + __sync_fetch_and_add(cnt, 1); + mark = *cnt; + } else { + /* A new MPTCP connection is just initiated and this is its primary subflow */ + bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY); + mark = init; + } + + /* Set the mark of the subflow's socket based on appearance order */ + err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err < 0) + return 1; + if (mark == 2) + err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX); + + return 1; +} + +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) +{ + struct mptcp_subflow_context *subflow; + int i = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, + struct mptcp_subflow_context)); + + if (ssk->sk_mark != ++i) { + ctx->retval = -2; + break; + } + } + + return 1; +} + +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) +{ + struct mptcp_subflow_context *subflow; + + mptcp_for_each_subflow(msk, subflow) { + struct inet_connection_sock *icsk; + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, + struct mptcp_subflow_context)); + icsk = bpf_core_cast(ssk, struct inet_connection_sock); + + if (ssk->sk_mark == 2 && + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) { + ctx->retval = -2; + break; + } + } + + return 1; +} + +SEC("cgroup/getsockopt") +int _getsockopt_subflow(struct bpf_sockopt *ctx) +{ + struct bpf_sock *sk = ctx->sk; + struct mptcp_sock *msk; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + if (!sk || sk->protocol != IPPROTO_MPTCP || + (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) && + !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION))) + return 1; + + msk = bpf_core_cast(sk, struct mptcp_sock); + if (msk->pm.subflows != 1) { + ctx->retval = -1; + return 1; + } + + if (ctx->optname == SO_MARK) + return _check_getsockopt_subflow_mark(msk, ctx); + return _check_getsockopt_subflow_cc(msk, ctx); +} diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c index aeff3a4f9287..c6edf8dbefeb 100644 --- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c @@ -27,6 +27,8 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); +int tcx_init_netns_cookie, tcx_netns_cookie; + SEC("sockops") int get_netns_cookie_sockops(struct bpf_sock_ops *ctx) { @@ -81,4 +83,12 @@ int get_netns_cookie_sk_msg(struct sk_msg_md *msg) return 1; } +SEC("tcx/ingress") +int get_netns_cookie_tcx(struct __sk_buff *skb) +{ + tcx_init_netns_cookie = bpf_get_netns_cookie(NULL); + tcx_netns_cookie = bpf_get_netns_cookie(skb); + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 672fc368d9c4..885377e83607 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -6,7 +6,7 @@ #include "bpf_experimental.h" SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -14,7 +14,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("2 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -23,7 +23,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("3 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -33,7 +33,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -55,7 +55,7 @@ static __noinline void preempt_enable(void) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -63,7 +63,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("2 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -72,7 +72,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) { preempt_disable(); diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c new file mode 100644 index 000000000000..5927054b6dd9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int tid; +int i; + +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + if (task->pid != tid) + return 0; + + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. + */ + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c new file mode 100644 index 000000000000..38d669957bf1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Ensure module parameter has PTR_MAYBE_NULL */ +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} + +/* Check NULL marking */ +SEC("tp_btf/sched_pi_setprio") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c index 76556e0b42b2..69da05bb6c63 100644 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; extern const int bpf_task_storage_busy __ksym; char _license[] SEC("license") = "GPL"; @@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy) { int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; if (bpf_get_current_pid_tgid() >> 32 != pid) diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c index 56b787a89876..d7fdcabe7d90 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -6,5 +6,17 @@ char _license[] SEC("license") = "GPL"; +/* + * This subprogram validates that libbpf handles the situation in which BPF + * object has subprograms in .text section, but has no entry BPF programs. + * At some point that was causing issues due to legacy logic of treating such + * subprogram as entry program (with unknown program type, which would fail). + */ +int dangling_subprog(void) +{ + /* do nothing, just be here */ + return 0; +} + SEC(".struct_ops.link") struct bpf_testmod_ops testmod_do_detach; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c new file mode 100644 index 000000000000..8ea57e5348ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 200 bytes */ + int a[50] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c new file mode 100644 index 000000000000..1f55ec4cee37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 100 bytes */ + int a[25] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c new file mode 100644 index 000000000000..f2f300d50988 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_1(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[1] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 400 bytes */ + int b[100] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 20 bytes */ + int a[5] = {}; + + a[1] = 1; + val_j += subprog1(a); + bpf_testmod_ops3_call_test_1(); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/tailcall_fail.c b/tools/testing/selftests/bpf/progs/tailcall_fail.c new file mode 100644 index 000000000000..bc77921d2bb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_fail.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +private(A) struct bpf_spin_lock lock; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?tc") +__failure __msg("function calls are not allowed while holding a lock") +int reject_tail_call_spin_lock(struct __sk_buff *ctx) +{ + bpf_spin_lock(&lock); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call cannot be used inside bpf_rcu_read_lock-ed region") +int reject_tail_call_rcu_lock(struct __sk_buff *ctx) +{ + bpf_rcu_read_lock(); + bpf_tail_call_static(ctx, &jmp_table, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call cannot be used inside bpf_preempt_disable-ed region") +int reject_tail_call_preempt_lock(struct __sk_buff *ctx) +{ + bpf_guard_preempt(); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call would lead to reference leak") +int reject_tail_call_ref(struct __sk_buff *ctx) +{ + struct foo { int i; } *p; + + p = bpf_obj_new(typeof(*p)); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index 6720c4b5be41..e9c4fea7a4bb 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -23,6 +23,7 @@ struct { struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; void bpf_task_release(struct task_struct *p) __ksym; struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +struct task_struct *bpf_task_from_vpid(s32 vpid) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index ad88a3796ddf..4c07ea193f72 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -247,6 +247,20 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl return 0; } +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_from_vpid_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_vpid(task->pid); + + /* Releasing bpf_task_from_vpid() lookup without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + SEC("lsm/task_free") __failure __msg("R1 must be a rcu pointer") int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index a55149015063..5fb4fc19d26a 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -366,3 +366,54 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl return 0; } + +SEC("syscall") +int test_task_from_vpid_current(const void *ctx) +{ + struct task_struct *current, *v_task; + + v_task = bpf_task_from_vpid(1); + if (!v_task) { + err = 1; + return 0; + } + + current = bpf_get_current_task_btf(); + + /* The current process should be the init process (pid 1) in the new pid namespace. */ + if (current != v_task) + err = 2; + + bpf_task_release(v_task); + return 0; +} + +SEC("syscall") +int test_task_from_vpid_invalid(const void *ctx) +{ + struct task_struct *v_task; + + v_task = bpf_task_from_vpid(-1); + if (v_task) { + err = 1; + goto err; + } + + /* There should be only one process (current process) in the new pid namespace. */ + v_task = bpf_task_from_vpid(2); + if (v_task) { + err = 2; + goto err; + } + + v_task = bpf_task_from_vpid(9999); + if (v_task) { + err = 3; + goto err; + } + + return 0; +err: + bpf_task_release(v_task); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_ls_uptr.c b/tools/testing/selftests/bpf/progs/task_ls_uptr.c new file mode 100644 index 000000000000..ddbe11b46eef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_ls_uptr.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_type); +} datamap SEC(".maps"); + +pid_t target_pid = 0; +pid_t parent_pid = 0; + +SEC("tp_btf/sys_enter") +int on_enter(__u64 *ctx) +{ + struct task_struct *task, *data_task; + struct value_type *ptr; + struct user_data *udata; + struct cgroup *cgrp; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + data_task = bpf_task_from_pid(parent_pid); + if (!data_task) + return 0; + + ptr = bpf_task_storage_get(&datamap, data_task, 0, 0); + bpf_task_release(data_task); + if (!ptr) + return 0; + + cgrp = bpf_kptr_xchg(&ptr->cgrp, NULL); + if (cgrp) { + int lvl = cgrp->level; + + bpf_cgroup_release(cgrp); + return lvl; + } + + udata = ptr->udata; + if (!udata || udata->result) + return 0; + udata->result = MAGIC_VALUE + udata->a + udata->b; + + udata = ptr->nested.udata; + if (udata && !udata->nested_result) + udata->nested_result = udata->result; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c index ea2dbb80f7b3..986829aaf73a 100644 --- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; #define EBUSY 16 #endif -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; int nr_get_errs = 0; int nr_del_errs = 0; @@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int ret, zero = 0; int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index 8a0632c37839..fe6249d99b31 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -5,18 +5,21 @@ #include "bpf_misc.h" __noinline -int subprog(struct __sk_buff *skb) +int subprog_tc(struct __sk_buff *skb) { int ret = 1; + __sink(skb); __sink(ret); + /* let verifier know that 'subprog_tc' can change pointers to skb->data */ + bpf_skb_change_proto(skb, 0, 0); return ret; } SEC("tc") int entry_tc(struct __sk_buff *skb) { - return subprog(skb); + return subprog_tc(skb); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c index f0759efff6ef..1cd1a1b72cb5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -10,16 +10,18 @@ #endif struct sockaddr_in6 srv_sa6 = {}; +struct sockaddr_in srv_sa4 = {}; __u16 listen_tp_sport = 0; __u16 req_sk_sport = 0; __u32 recv_cookie = 0; __u32 gen_cookie = 0; +__u32 mss = 0; __u32 linum = 0; #define LOG() ({ if (!linum) linum = __LINE__; }) -static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, - struct tcp_sock *tp, +static void test_syncookie_helper(void *iphdr, int iphdr_size, + struct tcphdr *th, struct tcp_sock *tp, struct __sk_buff *skb) { if (th->syn) { @@ -38,17 +40,18 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, return; } - mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h), + mss_cookie = bpf_tcp_gen_syncookie(tp, iphdr, iphdr_size, th, 40); if (mss_cookie < 0) { if (mss_cookie != -ENOENT) LOG(); } else { gen_cookie = (__u32)mss_cookie; + mss = mss_cookie >> 32; } } else if (gen_cookie) { /* It was in cookie mode */ - int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h), + int ret = bpf_tcp_check_syncookie(tp, iphdr, iphdr_size, th, sizeof(*th)); if (ret < 0) { @@ -60,26 +63,58 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, } } -static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) +static int handle_ip_tcp(struct ethhdr *eth, struct __sk_buff *skb) { - struct bpf_sock_tuple *tuple; + struct bpf_sock_tuple *tuple = NULL; + unsigned int tuple_len = 0; struct bpf_sock *bpf_skc; - unsigned int tuple_len; + void *data_end, *iphdr; + struct ipv6hdr *ip6h; + struct iphdr *ip4h; struct tcphdr *th; - void *data_end; + int iphdr_size; data_end = (void *)(long)(skb->data_end); - th = (struct tcphdr *)(ip6h + 1); - if (th + 1 > data_end) - return TC_ACT_OK; - - /* Is it the testing traffic? */ - if (th->dest != srv_sa6.sin6_port) + switch (eth->h_proto) { + case bpf_htons(ETH_P_IP): + ip4h = (struct iphdr *)(eth + 1); + if (ip4h + 1 > data_end) + return TC_ACT_OK; + if (ip4h->protocol != IPPROTO_TCP) + return TC_ACT_OK; + th = (struct tcphdr *)(ip4h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + /* Is it the testing traffic? */ + if (th->dest != srv_sa4.sin_port) + return TC_ACT_OK; + tuple_len = sizeof(tuple->ipv4); + tuple = (struct bpf_sock_tuple *)&ip4h->saddr; + iphdr = ip4h; + iphdr_size = sizeof(*ip4h); + break; + case bpf_htons(ETH_P_IPV6): + ip6h = (struct ipv6hdr *)(eth + 1); + if (ip6h + 1 > data_end) + return TC_ACT_OK; + if (ip6h->nexthdr != IPPROTO_TCP) + return TC_ACT_OK; + th = (struct tcphdr *)(ip6h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + /* Is it the testing traffic? */ + if (th->dest != srv_sa6.sin6_port) + return TC_ACT_OK; + tuple_len = sizeof(tuple->ipv6); + tuple = (struct bpf_sock_tuple *)&ip6h->saddr; + iphdr = ip6h; + iphdr_size = sizeof(*ip6h); + break; + default: return TC_ACT_OK; + } - tuple_len = sizeof(tuple->ipv6); - tuple = (struct bpf_sock_tuple *)&ip6h->saddr; if ((void *)tuple + tuple_len > data_end) { LOG(); return TC_ACT_OK; @@ -126,7 +161,7 @@ static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num; - test_syncookie_helper(ip6h, th, tp, skb); + test_syncookie_helper(iphdr, iphdr_size, th, tp, skb); bpf_sk_release(tp); return TC_ACT_OK; } @@ -142,7 +177,6 @@ release: SEC("tc") int cls_ingress(struct __sk_buff *skb) { - struct ipv6hdr *ip6h; struct ethhdr *eth; void *data_end; @@ -152,17 +186,11 @@ int cls_ingress(struct __sk_buff *skb) if (eth + 1 > data_end) return TC_ACT_OK; - if (eth->h_proto != bpf_htons(ETH_P_IPV6)) - return TC_ACT_OK; - - ip6h = (struct ipv6hdr *)(eth + 1); - if (ip6h + 1 > data_end) + if (eth->h_proto != bpf_htons(ETH_P_IP) && + eth->h_proto != bpf_htons(ETH_P_IPV6)) return TC_ACT_OK; - if (ip6h->nexthdr == IPPROTO_TCP) - return handle_ip6_tcp(ip6h, skb); - - return TC_ACT_OK; + return handle_ip_tcp(eth, skb); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index e68667aec6a6..cd4d752bd089 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val = 0; diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 92354cd72044..176a355e3062 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -1,27 +1,50 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <linux/version.h> #include <bpf/bpf_helpers.h> -__u32 sig = 0, pid = 0, status = 0, signal_thread = 0; +struct task_struct *bpf_task_from_pid(int pid) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, u64 value) __ksym; + +__u32 sig = 0, pid = 0, status = 0, signal_thread = 0, target_pid = 0; static __always_inline int bpf_send_signal_test(void *ctx) { + struct task_struct *target_task = NULL; int ret; + u64 value; if (status != 0 || pid == 0) return 0; if ((bpf_get_current_pid_tgid() >> 32) == pid) { - if (signal_thread) - ret = bpf_send_signal_thread(sig); - else - ret = bpf_send_signal(sig); + if (target_pid) { + target_task = bpf_task_from_pid(target_pid); + if (!target_task) + return 0; + value = 8; + } + + if (signal_thread) { + if (target_pid) + ret = bpf_send_signal_task(target_task, sig, PIDTYPE_PID, value); + else + ret = bpf_send_signal_thread(sig); + } else { + if (target_pid) + ret = bpf_send_signal_task(target_task, sig, PIDTYPE_TGID, value); + else + ret = bpf_send_signal(sig); + } if (ret == 0) status = 1; } + if (target_task) + bpf_task_release(target_task); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c new file mode 100644 index 000000000000..2796dd8545eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 ByteDance */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +long change_tail_ret = 1; + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + char *data, *data_end; + + bpf_skb_pull_data(skb, 1); + data = (char *)(unsigned long)skb->data; + data_end = (char *)(unsigned long)skb->data_end; + + if (data + 1 > data_end) + return SK_PASS; + + if (data[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0); + return SK_PASS; + } else if (data[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); + return SK_PASS; + } else if (data[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return SK_PASS; + } + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 43f40c4fe241..1c8b678e2e9a 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -28,8 +28,8 @@ struct { }, }; -SEC(".data.A") struct bpf_spin_lock lockA; -SEC(".data.B") struct bpf_spin_lock lockB; +static struct bpf_spin_lock lockA SEC(".data.A"); +static struct bpf_spin_lock lockB SEC(".data.B"); SEC("?tc") int lock_id_kptr_preserve(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c new file mode 100644 index 000000000000..28edafe803f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/pkt_cls.h> + +long change_tail_ret = 1; + +static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = data; + struct iphdr *iph; + + /* Verify Ethernet header */ + if ((void *)(data + sizeof(*eth)) > data_end) + return NULL; + + /* Skip Ethernet header to get to IP header */ + iph = (void *)(data + sizeof(struct ethhdr)); + + /* Verify IP header */ + if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end) + return NULL; + + /* Basic IP header validation */ + if (iph->version != 4) /* Only support IPv4 */ + return NULL; + + if (iph->ihl < 5) /* Minimum IP header length */ + return NULL; + + *ip_proto = iph->protocol; + return iph; +} + +static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph) +{ + void *data_end = (void *)(long)skb->data_end; + void *hdr = (void *)iph; + struct udphdr *udp; + + /* Calculate UDP header position */ + udp = hdr + (iph->ihl * 4); + hdr = (void *)udp; + + /* Verify UDP header bounds */ + if ((void *)(hdr + sizeof(*udp)) > data_end) + return NULL; + + return udp; +} + +SEC("tc/ingress") +int change_tail(struct __sk_buff *skb) +{ + int len = skb->len; + struct udphdr *udp; + struct iphdr *iph; + void *data_end; + char *payload; + int ip_proto; + + bpf_skb_pull_data(skb, len); + + data_end = (void *)(long)skb->data_end; + iph = parse_ip_header(skb, &ip_proto); + if (!iph) + return TCX_PASS; + + if (ip_proto != IPPROTO_UDP) + return TCX_PASS; + + udp = parse_udp_header(skb, iph); + if (!udp) + return TCX_PASS; + + payload = (char *)udp + (sizeof(struct udphdr)); + if (payload + 1 > (char *)data_end) + return TCX_PASS; + + if (payload[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return TCX_PASS; + } else if (payload[0] == 'Z') { /* Zero */ + change_tail_ret = bpf_skb_change_tail(skb, 0, 0); + return TCX_PASS; + } + return TCX_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index ab3eae3d6af8..10d825928499 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -18,6 +18,7 @@ bool seen_tc4; bool seen_tc5; bool seen_tc6; bool seen_tc7; +bool seen_tc8; bool set_type; @@ -25,6 +26,8 @@ bool seen_eth; bool seen_host; bool seen_mcast; +int mark, prio; + SEC("tc/ingress") int tc1(struct __sk_buff *skb) { @@ -100,3 +103,12 @@ out: seen_tc7 = true; return TCX_PASS; } + +SEC("tc/egress") +int tc8(struct __sk_buff *skb) +{ + seen_tc8 = true; + mark = skb->mark; + prio = skb->priority; + return TCX_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c deleted file mode 100644 index 6edebce563b5..000000000000 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook -// Copyright (c) 2019 Cloudflare - -#include <string.h> - -#include <linux/bpf.h> -#include <linux/pkt_cls.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <sys/socket.h> -#include <linux/tcp.h> - -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_endian.h> - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, __u32); - __type(value, __u32); - __uint(max_entries, 3); -} results SEC(".maps"); - -static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, - void *iph, __u32 ip_size, - struct tcphdr *tcph) -{ - __u32 thlen = tcph->doff * 4; - - if (tcph->syn && !tcph->ack) { - // packet should only have an MSS option - if (thlen != 24) - return 0; - - if ((void *)tcph + thlen > data_end) - return 0; - - return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen); - } - return 0; -} - -static __always_inline void check_syncookie(void *ctx, void *data, - void *data_end) -{ - struct bpf_sock_tuple tup; - struct bpf_sock *sk; - struct ethhdr *ethh; - struct iphdr *ipv4h; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - int ret; - __u32 key_mss = 2; - __u32 key_gen = 1; - __u32 key = 0; - __s64 seq_mss; - - ethh = data; - if (ethh + 1 > data_end) - return; - - switch (bpf_ntohs(ethh->h_proto)) { - case ETH_P_IP: - ipv4h = data + sizeof(struct ethhdr); - if (ipv4h + 1 > data_end) - return; - - if (ipv4h->ihl != 5) - return; - - tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr); - if (tcph + 1 > data_end) - return; - - tup.ipv4.saddr = ipv4h->saddr; - tup.ipv4.daddr = ipv4h->daddr; - tup.ipv4.sport = tcph->source; - tup.ipv4.dport = tcph->dest; - - sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4), - BPF_F_CURRENT_NETNS, 0); - if (!sk) - return; - - if (sk->state != BPF_TCP_LISTEN) - goto release; - - seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h), - tcph); - - ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h), - tcph, sizeof(*tcph)); - break; - - case ETH_P_IPV6: - ipv6h = data + sizeof(struct ethhdr); - if (ipv6h + 1 > data_end) - return; - - if (ipv6h->nexthdr != IPPROTO_TCP) - return; - - tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr); - if (tcph + 1 > data_end) - return; - - memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr)); - memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr)); - tup.ipv6.sport = tcph->source; - tup.ipv6.dport = tcph->dest; - - sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6), - BPF_F_CURRENT_NETNS, 0); - if (!sk) - return; - - if (sk->state != BPF_TCP_LISTEN) - goto release; - - seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h), - tcph); - - ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h), - tcph, sizeof(*tcph)); - break; - - default: - return; - } - - if (seq_mss > 0) { - __u32 cookie = (__u32)seq_mss; - __u32 mss = seq_mss >> 32; - - bpf_map_update_elem(&results, &key_gen, &cookie, 0); - bpf_map_update_elem(&results, &key_mss, &mss, 0); - } - - if (ret == 0) { - __u32 cookie = bpf_ntohl(tcph->ack_seq) - 1; - - bpf_map_update_elem(&results, &key, &cookie, 0); - } - -release: - bpf_sk_release(sk); -} - -SEC("tc") -int check_syncookie_clsact(struct __sk_buff *skb) -{ - check_syncookie(skb, (void *)(long)skb->data, - (void *)(long)skb->data_end); - return TC_ACT_OK; -} - -SEC("xdp") -int check_syncookie_xdp(struct xdp_md *ctx) -{ - check_syncookie(ctx, (void *)(long)ctx->data, - (void *)(long)ctx->data_end); - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 20ec6723df18..3619239b01b7 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -12,10 +12,12 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); +__u32 redirect_count = 0; + SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { - return bpf_redirect_map(&cpu_map, 1, 0); + return bpf_redirect_map(&cpu_map, 0, 0); } SEC("xdp") @@ -27,6 +29,9 @@ int xdp_dummy_prog(struct xdp_md *ctx) SEC("xdp/cpumap") int xdp_dummy_cm(struct xdp_md *ctx) { + if (bpf_get_smp_processor_id() == 0) + redirect_count++; + if (ctx->ingress_ifindex == IFINDEX_LO) return XDP_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 4139a14f9996..92b65a485d4a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -12,7 +12,7 @@ struct { SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { - return bpf_redirect_map(&dm_ports, 1, 0); + return bpf_redirect_map(&dm_ports, 0, 0); } /* invalid program on DEVMAP entry; diff --git a/tools/testing/selftests/bpf/progs/update_map_in_htab.c b/tools/testing/selftests/bpf/progs/update_map_in_htab.c new file mode 100644 index 000000000000..c2066247cd9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/update_map_in_htab.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); + __array(values, struct inner_map_type); +} outer_htab_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); + __array(values, struct inner_map_type); +} outer_alloc_htab_map SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c index 7e0fdcbbd242..93752bb5690b 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c @@ -24,16 +24,16 @@ int uprobe_1(struct pt_regs *ctx) return 0; } -SEC("uprobe.multi") +SEC("uprobe.session") int uprobe_2(struct pt_regs *ctx) { uprobe_result[2]++; return 0; } -SEC("uprobe.multi") +SEC("uprobe.session") int uprobe_3(struct pt_regs *ctx) { uprobe_result[3]++; - return 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c new file mode 100644 index 000000000000..30bff90b68dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_multi_func_1_addr = 0; +__u64 uprobe_multi_func_2_addr = 0; +__u64 uprobe_multi_func_3_addr = 0; + +__u64 uprobe_session_result[3] = {}; +__u64 uprobe_multi_sleep_result = 0; + +void *user_ptr = 0; +int pid = 0; + +static int uprobe_multi_check(void *ctx, bool is_return) +{ + const __u64 funcs[] = { + uprobe_multi_func_1_addr, + uprobe_multi_func_2_addr, + uprobe_multi_func_3_addr, + }; + unsigned int i; + __u64 addr; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + addr = bpf_get_func_ip(ctx); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + if (funcs[i] == addr) { + uprobe_session_result[i]++; + break; + } + } + + /* only uprobe_multi_func_2 executes return probe */ + if ((addr == uprobe_multi_func_1_addr) || + (addr == uprobe_multi_func_3_addr)) + return 1; + + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*") +int uprobe(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, bpf_session_is_return()); +} + +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.session.s//proc/self/exe:uprobe_multi_func_*") +int uprobe_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_multi_sleep_result++; + return uprobe_multi_check(ctx, bpf_session_is_return()); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c new file mode 100644 index 000000000000..5befdf944dc6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +__u64 test_uprobe_1_result = 0; +__u64 test_uprobe_2_result = 0; +__u64 test_uprobe_3_result = 0; + +static int check_cookie(__u64 val, __u64 *result) +{ + __u64 *cookie; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + cookie = bpf_session_cookie(); + + if (bpf_session_is_return()) + *result = *cookie == val ? val : 0; + else + *cookie = val; + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_1(struct pt_regs *ctx) +{ + return check_cookie(1, &test_uprobe_1_result); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2") +int uprobe_2(struct pt_regs *ctx) +{ + return check_cookie(2, &test_uprobe_2_result); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3") +int uprobe_3(struct pt_regs *ctx) +{ + return check_cookie(3, &test_uprobe_3_result); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c new file mode 100644 index 000000000000..8fbcd69fae22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +int idx_entry = 0; +int idx_return = 0; + +__u64 test_uprobe_cookie_entry[6]; +__u64 test_uprobe_cookie_return[3]; + +static int check_cookie(void) +{ + __u64 *cookie = bpf_session_cookie(); + + if (bpf_session_is_return()) { + if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return)) + return 1; + test_uprobe_cookie_return[idx_return++] = *cookie; + return 0; + } + + if (idx_entry >= ARRAY_SIZE(test_uprobe_cookie_entry)) + return 1; + *cookie = test_uprobe_cookie_entry[idx_entry]; + return idx_entry++ % 2; +} + + +SEC("uprobe.session//proc/self/exe:uprobe_session_recursive") +int uprobe_recursive(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + return check_cookie(); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c new file mode 100644 index 000000000000..7c960376ae97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_session_result[3] = {}; +int pid = 0; + +static int uprobe_multi_check(void *ctx, int idx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + uprobe_session_result[idx]++; + + /* only consumer 1 executes return probe */ + if (idx == 0 || idx == 2) + return 1; + + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_0(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 0); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_1(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 1); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_2(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 2); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c new file mode 100644 index 000000000000..fe49f2cb5360 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + + +SEC("uprobe.session") +__success +int uprobe_sesison_return_0(struct pt_regs *ctx) +{ + return 0; +} + +SEC("uprobe.session") +__success +int uprobe_sesison_return_1(struct pt_regs *ctx) +{ + return 1; +} + +SEC("uprobe.session") +__failure +__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int uprobe_sesison_return_2(struct pt_regs *ctx) +{ + return 2; +} diff --git a/tools/testing/selftests/bpf/progs/uptr_failure.c b/tools/testing/selftests/bpf/progs/uptr_failure.c new file mode 100644 index 000000000000..0cfa1fd61440 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_failure.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" +#include "bpf_misc.h" +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_type); +} datamap SEC(".maps"); + +SEC("?syscall") +__failure __msg("store to uptr disallowed") +int uptr_write(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->udata = NULL; + return 0; +} + +SEC("?syscall") +__failure __msg("store to uptr disallowed") +int uptr_write_nested(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->nested.udata = NULL; + return 0; +} + +SEC("?syscall") +__failure __msg("R1 invalid mem access 'mem_or_null'") +int uptr_no_null_check(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->udata->result = 0; + + return 0; +} + +SEC("?syscall") +__failure __msg("doesn't point to kptr") +int uptr_kptr_xchg(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + bpf_kptr_xchg(&v->udata, NULL); + + return 0; +} + +SEC("?syscall") +__failure __msg("invalid mem access 'scalar'") +int uptr_obj_new(const void *ctx) +{ + struct value_type *v; + + v = bpf_obj_new(typeof(*v)); + if (!v) + return 0; + + if (v->udata) + v->udata->result = 0; + + bpf_obj_drop(v); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uptr_map_failure.c b/tools/testing/selftests/bpf/progs/uptr_map_failure.c new file mode 100644 index 000000000000..417b763d76b4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_map_failure.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct large_uptr); +} large_uptr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct empty_uptr); +} empty_uptr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct kstruct_uptr); +} kstruct_uptr_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/uptr_update_failure.c b/tools/testing/selftests/bpf/progs/uptr_update_failure.c new file mode 100644 index 000000000000..86c3bb954abc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_update_failure.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_lock_type); +} datamap SEC(".maps"); + +/* load test only. not used */ +SEC("syscall") +int not_used(void *ctx) +{ + struct value_lock_type *ptr; + struct task_struct *task; + struct user_data *udata; + + task = bpf_get_current_task_btf(); + ptr = bpf_task_storage_get(&datamap, task, 0, 0); + if (!ptr) + return 0; + + bpf_spin_lock(&ptr->lock); + + udata = ptr->udata; + if (!udata) { + bpf_spin_unlock(&ptr->lock); + return 0; + } + udata->result = MAGIC_VALUE + udata->a + udata->b; + + bpf_spin_unlock(&ptr->lock); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 6065f862d964..f94f30cf1bb8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -29,12 +29,12 @@ int big_alloc1(void *ctx) if (!page1) return 1; *page1 = 1; - page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2, 1, NUMA_NO_NODE, 0); if (!page2) return 2; *page2 = 2; - no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, 1, NUMA_NO_NODE, 0); if (no_page) return 3; @@ -66,4 +66,110 @@ int big_alloc1(void *ctx) #endif return 0; } + +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) +#define PAGE_CNT 100 +__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ +__u8 __arena *base; + +/* + * Check that arena's range_tree algorithm allocates pages sequentially + * on the first pass and then fills in all gaps on the second pass. + */ +__noinline int alloc_pages(int page_cnt, int pages_atonce, bool first_pass, + int max_idx, int step) +{ + __u8 __arena *pg; + int i, pg_idx; + + for (i = 0; i < page_cnt; i++) { + pg = bpf_arena_alloc_pages(&arena, NULL, pages_atonce, + NUMA_NO_NODE, 0); + if (!pg) + return step; + pg_idx = (unsigned long) (pg - base) / PAGE_SIZE; + if (first_pass) { + /* Pages must be allocated sequentially */ + if (pg_idx != i) + return step + 100; + } else { + /* Allocator must fill into gaps */ + if (pg_idx >= max_idx || (pg_idx & 1)) + return step + 200; + } + *pg = pg_idx; + page[pg_idx] = pg; + cond_break; + } + return 0; +} + +SEC("syscall") +__success __retval(0) +int big_alloc2(void *ctx) +{ + __u8 __arena *pg; + int i, err; + + base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!base) + return 1; + bpf_arena_free_pages(&arena, (void __arena *)base, 1); + + err = alloc_pages(PAGE_CNT, 1, true, PAGE_CNT, 2); + if (err) + return err; + + /* Clear all even pages */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 3; + bpf_arena_free_pages(&arena, (void __arena *)pg, 1); + page[i] = NULL; + cond_break; + } + + /* Allocate into freed gaps */ + err = alloc_pages(PAGE_CNT / 2, 1, false, PAGE_CNT, 4); + if (err) + return err; + + /* Free pairs of pages */ + for (i = 0; i < PAGE_CNT; i += 4) { + pg = page[i]; + if (*pg != i) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)pg, 2); + page[i] = NULL; + page[i + 1] = NULL; + cond_break; + } + + /* Allocate 2 pages at a time into freed gaps */ + err = alloc_pages(PAGE_CNT / 4, 2, false, PAGE_CNT, 6); + if (err) + return err; + + /* Check pages without freeing */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 7; + cond_break; + } + + pg = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + + if (!pg) + return 8; + /* + * The first PAGE_CNT pages are occupied. The new page + * must be above. + */ + if ((pg - base) / PAGE_SIZE < PAGE_CNT) + return 9; + return 0; +} +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 95d7ecc12963..4195aa824ba5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -368,8 +368,7 @@ __naked void a_read_only_array_2_1(void) r4 = 0; \ r5 = 0; \ call %[bpf_csum_diff]; \ -l0_%=: r0 &= 0xffff; \ - exit; \ +l0_%=: exit; \ " : : __imm(bpf_csum_diff), __imm(bpf_map_lookup_elem), diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index f4da4d508ddb..8bcddadfc4da 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -15,6 +15,8 @@ int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak; void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak; +u64 bits_array[511] = {}; + SEC("iter.s/cgroup") __description("bits iter without destroy") __failure __msg("Unreleased reference") @@ -30,18 +32,18 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->next()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { - struct bpf_iter_bits *it = NULL; + struct bpf_iter_bits it = {}; - bpf_iter_bits_next(it); + bpf_iter_bits_next(&it); return 0; } SEC("iter/cgroup") __description("uninitialized iter in ->destroy()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; @@ -55,9 +57,15 @@ __description("null pointer") __success __retval(0) int null_pointer(void) { - int nr = 0; + struct bpf_iter_bits iter; + int err, nr = 0; int *bit; + err = bpf_iter_bits_new(&iter, NULL, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EINVAL) + return 1; + bpf_for_each(bits, bit, NULL, 1) nr++; return nr; @@ -110,16 +118,16 @@ int bit_index(void) } SEC("syscall") -__description("bits nomem") +__description("bits too big") __success __retval(0) -int bits_nomem(void) +int bits_too_big(void) { u64 data[4]; int nr = 0; int *bit; __builtin_memset(&data, 0xff, sizeof(data)); - bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */ + bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */ nr++; return nr; } @@ -151,3 +159,74 @@ int zero_words(void) nr++; return nr; } + +SEC("syscall") +__description("huge words") +__success __retval(0) +int huge_words(void) +{ + u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 67108865) + nr++; + return nr; +} + +SEC("syscall") +__description("max words") +__success __retval(4) +int max_words(void) +{ + volatile int nr = 0; + int *bit; + + bits_array[0] = (1ULL << 63) | 1U; + bits_array[510] = (1ULL << 33) | (1ULL << 32); + + bpf_for_each(bits, bit, bits_array, 511) { + if (nr == 0 && *bit != 0) + break; + if (nr == 2 && *bit != 32672) + break; + nr++; + } + return nr; +} + +SEC("syscall") +__description("bad words") +__success __retval(0) +int bad_words(void) +{ + void *bad_addr = (void *)-4095; + struct bpf_iter_bits iter; + volatile int nr; + int *bit; + int err; + + err = bpf_iter_bits_new(&iter, bad_addr, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 1; + + nr = 0; + bpf_for_each(bits, bit, bad_addr, 1) + nr++; + if (nr != 0) + return 2; + + err = bpf_iter_bits_new(&iter, bad_addr, 4); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 3; + + nr = 0; + bpf_for_each(bits, bit, bad_addr, 4) + nr++; + if (nr != 0) + return 4; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index 9da97d2efcd9..5094c288cfd7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -790,61 +790,6 @@ __naked static void cumulative_stack_depth_subprog(void) :: __imm(bpf_get_smp_processor_id) : __clobber_all); } -SEC("raw_tp") -__arch_x86_64 -__log_level(4) -__msg("stack depth 512") -__xlated("0: r1 = 42") -__xlated("1: *(u64 *)(r10 -512) = r1") -__xlated("2: w0 = ") -__xlated("3: r0 = &(void __percpu *)(r0)") -__xlated("4: r0 = *(u32 *)(r0 +0)") -__xlated("5: exit") -__success -__naked int bpf_fastcall_max_stack_ok(void) -{ - asm volatile( - "r1 = 42;" - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_smp_processor_id];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - "exit;" - : - : __imm_const(max_bpf_stack, MAX_BPF_STACK), - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), - __imm(bpf_get_smp_processor_id) - : __clobber_all - ); -} - -SEC("raw_tp") -__arch_x86_64 -__log_level(4) -__msg("stack depth 520") -__failure -__naked int bpf_fastcall_max_stack_fail(void) -{ - asm volatile( - "r1 = 42;" - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_smp_processor_id];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - /* call to prandom blocks bpf_fastcall rewrite */ - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_prandom_u32];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - "exit;" - : - : __imm_const(max_bpf_stack, MAX_BPF_STACK), - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), - __imm(bpf_get_smp_processor_id), - __imm(bpf_get_prandom_u32) - : __clobber_all - ); -} - SEC("cgroup/getsockname_unix") __xlated("0: r2 = 1") /* bpf_cast_to_kern_ctx is replaced by a single assignment */ diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index a570e48b917a..28b939572cda 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void btf_ctx_access_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); @@ -23,7 +23,43 @@ __success __retval(0) __naked void ctx_access_u32_pointer_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u32") +__failure __msg("size 4 must be 8") +__naked void ctx_access_u32_pointer_reject_32(void) +{ + asm volatile (" \ + r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u16") +__failure __msg("size 2 must be 8") +__naked void ctx_access_u32_pointer_reject_16(void) +{ + asm volatile (" \ + r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u8") +__failure __msg("size 1 must be 8") +__naked void ctx_access_u32_pointer_reject_8(void) +{ + asm volatile (" \ + r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\ r0 = 0; \ exit; \ " ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/verifier_const.c b/tools/testing/selftests/bpf/progs/verifier_const.c index 2e533d7eec2f..e118dbb768bf 100644 --- a/tools/testing/selftests/bpf/progs/verifier_const.c +++ b/tools/testing/selftests/bpf/progs/verifier_const.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Isovalent */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" const volatile long foo = 42; @@ -66,4 +67,32 @@ int tcx6(struct __sk_buff *skb) return TCX_PASS; } +static inline void write_fixed(volatile void *p, __u32 val) +{ + *(volatile __u32 *)p = val; +} + +static inline void write_dyn(void *p, void *val, int len) +{ + bpf_copy_from_user(p, len, val); +} + +SEC("tc/ingress") +__description("rodata/mark: write with unknown reg rejected") +__failure __msg("write into map forbidden") +int tcx7(struct __sk_buff *skb) +{ + write_fixed((void *)&foo, skb->mark); + return TCX_PASS; +} + +SEC("lsm.s/bprm_committed_creds") +__description("rodata/mark: write with unknown reg rejected") +__failure __msg("write into map forbidden") +int BPF_PROG(bprm, struct linux_binprm *bprm) +{ + write_dyn((void *)&foo, &bart, bpf_get_prandom_u32() & 3); + return 0; +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c index ec79cbcfde91..87e51a215558 100644 --- a/tools/testing/selftests/bpf/progs/verifier_d_path.c +++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void d_path_accept(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ @@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe") __naked void d_path_reject(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c new file mode 100644 index 000000000000..8f755d2464cf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("scalars: find linked scalars") +__failure +__msg("math between fp pointer and 2147483647 is not allowed") +__naked void scalars(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = 0x80000001 ll; \ + r1 /= 1; \ + r2 = r1; \ + r4 = r1; \ + w2 += 0x7FFFFFFF; \ + w4 += 0; \ + if r2 == 0 goto l1; \ + exit; \ +l1: \ + r4 >>= 63; \ + r3 = 1; \ + r3 -= r4; \ + r3 *= 0x7FFFFFFF; \ + r3 += r10; \ + *(u8*)(r3 - 1) = r0; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 028ec855587b..994bbc346d25 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -287,6 +287,46 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV64SX, S8, unsigned range_check") +__success __retval(0) +__naked void mov64sx_s8_range_check(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + r0 += 0xfe; \ + r0 = (s8)r0; \ + if r0 < 0xfffffffffffffffe goto label_%=; \ + r0 = 0; \ + exit; \ +label_%=: \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, unsigned range_check") +__success __retval(0) +__naked void mov32sx_s8_range_check(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0x1; \ + w0 += 0xfe; \ + w0 = (s8)w0; \ + if w0 < 0xfffffffe goto label_%=; \ + r0 = 0; \ + exit; \ +label_%=: \ + exit; \ + " : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c new file mode 100644 index 000000000000..4ccf1ebc42d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("tc/ingress") +__description("uninit/mtu: write rejected") +__success +__caps_unpriv(CAP_BPF|CAP_NET_ADMIN) +__failure_unpriv __msg_unpriv("invalid indirect read from stack") +int tc_uninit_mtu(struct __sk_buff *ctx) +{ + __u32 mtu; + + bpf_check_mtu(ctx, 0, &mtu, 0, 0); + return TCX_PASS; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c new file mode 100644 index 000000000000..b1fbdf119553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +/* From include/linux/filter.h */ +#define MAX_BPF_STACK 512 + +#if defined(__TARGET_ARCH_x86) + +struct elem { + struct bpf_timer t; + char pad[256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +SEC("kprobe") +__description("Private stack, single prog") +__success +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x100(%r9)") +__naked void private_stack_single_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 256) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("raw_tp") +__description("No private stack") +__success +__arch_x86_64 +__jited(" subq $0x8, %rsp") +__naked void no_private_stack_nested(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +__used +__naked static void cumulative_stack_depth_subprog(void) +{ + asm volatile (" \ + r1 = 41; \ + *(u64 *)(r10 - 32) = r1; \ + call %[bpf_get_smp_processor_id]; \ + exit; \ +" : + : __imm(bpf_get_smp_processor_id) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the main prog */ +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq 0x{{.*}}") +__jited(" popq %r9") +__jited(" xorl %eax, %eax") +__naked void private_stack_nested_1(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - %[max_bpf_stack]) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +__naked __noinline __used +static unsigned long loop_callback(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_common); +} + +SEC("raw_tp") +__description("Private stack, callback") +__success +__arch_x86_64 +/* for func loop_callback */ +__jited("func #1") +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__naked void private_stack_callback(void) +{ + asm volatile (" \ + r1 = 1; \ + r2 = %[loop_callback]; \ + r3 = 0; \ + r4 = 0; \ + call %[bpf_loop]; \ + r0 = 0; \ + exit; \ +" : + : __imm_ptr(loop_callback), + __imm(bpf_loop) + : __clobber_common); +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in main prog") +__success __retval(0) +__arch_x86_64 +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_main_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ +" ::: __clobber_common); + + bpf_throw(0); + return 0; +} + +__used static int subprog_exception(void) +{ + bpf_throw(0); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in subprog") +__success __retval(0) +__arch_x86_64 +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_sub_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call subprog_exception; \ +" ::: __clobber_common); + + return 0; +} + +int glob; +__noinline static void subprog2(int *val) +{ + glob += val[0] * 2; +} + +__noinline static void subprog1(int *val) +{ + int tmp[64] = {}; + + tmp[0] = *val; + subprog2(tmp); +} + +__noinline static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + subprog1(key); + return 0; +} + +__noinline static int timer_cb2(void *map, int *key, struct bpf_timer *timer) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, not nested") +__success __retval(0) +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +int private_stack_async_callback_1(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb2); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, potential nesting") +__success __retval(0) +__arch_x86_64 +__jited(" subq $0x100, %rsp") +int private_stack_async_callback_2(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +#else + +SEC("kprobe") +__description("private stack is not supported, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index c4c6da21265e..683a882b3e6d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -791,7 +791,7 @@ l0_%=: r0 = *(u8*)skb[0]; \ SEC("tc") __description("reference tracking: forbid LD_ABS while holding reference") -__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references") +__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak") __naked void ld_abs_while_holding_reference(void) { asm volatile (" \ @@ -836,7 +836,7 @@ l0_%=: r7 = 1; \ SEC("tc") __description("reference tracking: forbid LD_IND while holding reference") -__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references") +__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak") __naked void ld_ind_while_holding_reference(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 2ecf77b623e0..7c5e5e6d10eb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -760,4 +760,71 @@ __naked void two_old_ids_one_cur_id(void) : __clobber_all); } +SEC("socket") +/* Note the flag, see verifier.c:opt_subreg_zext_lo32_rnd_hi32() */ +__flag(BPF_F_TEST_RND_HI32) +__success +/* This test was added because of a bug in verifier.c:sync_linked_regs(), + * upon range propagation it destroyed subreg_def marks for registers. + * The subreg_def mark is used to decide whether zero extension instructions + * are needed when register is read. When BPF_F_TEST_RND_HI32 is set it + * also causes generation of statements to randomize upper halves of + * read registers. + * + * The test is written in a way to return an upper half of a register + * that is affected by range propagation and must have it's subreg_def + * preserved. This gives a return value of 0 and leads to undefined + * return value if subreg_def mark is not preserved. + */ +__retval(0) +/* Check that verifier believes r1/r0 are zero at exit */ +__log_level(2) +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +__msg("from 3 to 4") +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +/* Verify that statements to randomize upper half of r1 had not been + * generated. + */ +__xlated("call unknown") +__xlated("r0 &= 2147483647") +__xlated("w1 = w0") +/* This is how disasm.c prints BPF_ZEXT_REG at the moment, x86 and arm + * are the only CI archs that do not need zero extension for subregs. + */ +#if !defined(__TARGET_ARCH_x86) && !defined(__TARGET_ARCH_arm64) +__xlated("w1 = w1") +#endif +__xlated("if w0 < 0xa goto pc+0") +__xlated("r1 >>= 32") +__xlated("r0 = r1") +__xlated("exit") +__naked void linked_regs_and_subreg_def(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* make sure r0 is in 32-bit range, otherwise w1 = w0 won't + * assign same IDs to registers. + */ + "r0 &= 0x7fffffff;" + /* link w1 and w0 via ID */ + "w1 = w0;" + /* 'if' statement propagates range info from w0 to w1, + * but should not affect w1->subreg_def property. + */ + "if w0 < 10 goto +0;" + /* r1 is read here, on archs that require subreg zero + * extension this would cause zext patch generation. + */ + "r1 >>= 32;" + "r0 = r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c index 5a14498d352f..f40e57251e94 100644 --- a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c +++ b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c @@ -2,6 +2,7 @@ /* Converted from tools/testing/selftests/bpf/verifier/search_pruning.c */ #include <linux/bpf.h> +#include <../../../include/linux/filter.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -336,4 +337,26 @@ l0_%=: r1 = 42; \ : __clobber_all); } +/* Without checkpoint forcibly inserted at the back-edge a loop this + * test would take a very long time to verify. + */ +SEC("kprobe") +__failure __log_level(4) +__msg("BPF program is too large.") +__naked void short_loop1(void) +{ + asm volatile ( + " r7 = *(u16 *)(r1 +0);" + "1: r7 += 0x1ab064b9;" + " .8byte %[jset];" /* same as 'if r7 & 0x702000 goto 1b;' */ + " r7 &= 0x1ee60e;" + " r7 += r1;" + " if r7 s> 0x37d2 goto +0;" + " r0 = 0;" + " exit;" + : + : __imm_insn(jset, BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x702000, -2)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index ee76b51005ab..0d5e56dffabb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -977,4 +984,113 @@ l1_%=: r0 = *(u8*)(r7 + 0); \ : __clobber_all); } +SEC("cgroup/post_bind4") +__description("sk->src_ip6[0] [load 1st byte]") +__failure __msg("invalid bpf_context access off=28 size=2") +__naked void post_bind4_read_src_ip6(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_ip6_0]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_ip6_0, offsetof(struct bpf_sock, src_ip6[0])) + : __clobber_all); +} + +SEC("cgroup/post_bind4") +__description("sk->mark [load mark]") +__failure __msg("invalid bpf_context access off=16 size=2") +__naked void post_bind4_read_mark(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_mark]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)) + : __clobber_all); +} + +SEC("cgroup/post_bind6") +__description("sk->src_ip4 [load src_ip4]") +__failure __msg("invalid bpf_context access off=24 size=2") +__naked void post_bind6_read_src_ip4(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_ip4]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_ip4, offsetof(struct bpf_sock, src_ip4)) + : __clobber_all); +} + +SEC("cgroup/sock_create") +__description("sk->src_port [word load]") +__failure __msg("invalid bpf_context access off=44 size=2") +__naked void sock_create_read_src_port(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_port]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_port, offsetof(struct bpf_sock, src_port)) + : __clobber_all); +} + +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 671d9f415dbf..1e5a511e8494 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -1244,4 +1244,39 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void) : __clobber_all); } +SEC("socket") +__description("stack_noperfmon: reject read of invalid slots") +__success +__caps_unpriv(CAP_BPF) +__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8") +__naked void stack_noperfmon_reject_invalid_read(void) +{ + asm volatile (" \ + r2 = 1; \ + r6 = r10; \ + r6 += -8; \ + *(u8 *)(r6 + 0) = r2; \ + r2 = *(u64 *)(r6 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots") +__success +__caps_unpriv(CAP_BPF) +__success_unpriv +__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void) +{ + asm volatile(" \ + r0 = 0; \ + *(u64 *)(r10 - 8) = r0; \ + *(u32 *)(r10 - 8) = r0; \ + exit; \ +" : + : + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index fb316c080c84..3f679de73229 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -187,7 +187,7 @@ l0_%=: r6 = r0; \ SEC("cgroup/skb") __description("spin_lock: test6 missing unlock") -__failure __msg("unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region") __failure_unpriv __msg_unpriv("") __naked void spin_lock_test6_missing_unlock(void) { diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index f8f5dc9f72b8..62b8e29ced9f 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -21,7 +21,6 @@ #define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) -#define IP_DF 0x4000 #define IP_MF 0x2000 #define IP_OFFSET 0x1fff @@ -442,7 +441,7 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo /* TCP doesn't normally use fragments, and XDP can't reassemble * them. */ - if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + if ((hdr->ipv4->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) return XDP_DROP; tup.ipv4.saddr = hdr->ipv4->saddr; diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h index ca0162b4dc57..1fcfa5160231 100644 --- a/tools/testing/selftests/bpf/sdt.h +++ b/tools/testing/selftests/bpf/sdt.h @@ -102,6 +102,8 @@ # define STAP_SDT_ARG_CONSTRAINT nZr # elif defined __arm__ # define STAP_SDT_ARG_CONSTRAINT g +# elif defined __loongarch__ +# define STAP_SDT_ARG_CONSTRAINT nmr # else # define STAP_SDT_ARG_CONSTRAINT nor # endif diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 3e9b009580d4..53b06647cf57 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -36,6 +36,7 @@ #define TEST_TAG_ARCH "comment:test_arch=" #define TEST_TAG_JITED_PFX "comment:test_jited=" #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" +#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -74,6 +75,7 @@ struct test_subspec { struct expected_msgs jited; int retval; bool execute; + __u64 caps; }; struct test_spec { @@ -276,6 +278,37 @@ static int parse_int(const char *str, int *val, const char *name) return 0; } +static int parse_caps(const char *str, __u64 *val, const char *name) +{ + int cap_flag = 0; + char *token = NULL, *saveptr = NULL; + + char *str_cpy = strdup(str); + if (str_cpy == NULL) { + PRINT_FAIL("Memory allocation failed\n"); + return -EINVAL; + } + + token = strtok_r(str_cpy, "|", &saveptr); + while (token != NULL) { + errno = 0; + if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) { + PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token); + return -EINVAL; + } + cap_flag = strtol(token, NULL, 10); + if (!cap_flag || errno) { + PRINT_FAIL("failed to parse caps %s\n", name); + return -EINVAL; + } + *val |= (1ULL << cap_flag); + token = strtok_r(NULL, "|", &saveptr); + } + + free(str_cpy); + return 0; +} + static int parse_retval(const char *str, int *val, const char *name) { struct { @@ -541,6 +574,12 @@ static int parse_test_spec(struct test_loader *tester, jit_on_next_line = true; } else if (str_has_pfx(s, TEST_BTF_PATH)) { spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; + } else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) { + val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1; + err = parse_caps(val, &spec->unpriv.caps, "test caps"); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; } } @@ -917,6 +956,13 @@ void run_subtest(struct test_loader *tester, test__end_subtest(); return; } + if (subspec->caps) { + err = cap_enable_effective(subspec->caps, NULL); + if (err) { + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); + goto subtest_cleanup; + } + } } /* Implicitly reset to NULL if next test case doesn't specify */ diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 905d5981ace1..8b40e9496af1 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -26,10 +26,6 @@ #include "test_maps.h" #include "testing_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - int skips; static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c7a70e1a1085..6088d8222d59 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -16,15 +16,18 @@ #include <sys/socket.h> #include <sys/un.h> #include <bpf/btf.h> +#include <time.h> #include "json_writer.h" #include "network_helpers.h" +/* backtrace() and backtrace_symbols_fd() are glibc specific, + * use header file when glibc is available and provide stub + * implementations when another libc implementation is used. + */ #ifdef __GLIBC__ #include <execinfo.h> /* backtrace */ -#endif - -/* Default backtrace funcs if missing at link */ +#else __weak int backtrace(void **buffer, int size) { return 0; @@ -34,6 +37,7 @@ __weak void backtrace_symbols_fd(void *const *buffer, int size, int fd) { dprintf(fd, "<backtrace not supported>\n"); } +#endif /*__GLIBC__ */ int env_verbosity = 0; @@ -176,6 +180,88 @@ int usleep(useconds_t usec) return syscall(__NR_nanosleep, &ts, NULL); } +/* Watchdog timer is started by watchdog_start() and stopped by watchdog_stop(). + * If timer is active for longer than env.secs_till_notify, + * it prints the name of the current test to the stderr. + * If timer is active for longer than env.secs_till_kill, + * it kills the thread executing the test by sending a SIGSEGV signal to it. + */ +static void watchdog_timer_func(union sigval sigval) +{ + struct itimerspec timeout = {}; + char test_name[256]; + int err; + + if (env.subtest_state) + snprintf(test_name, sizeof(test_name), "%s/%s", + env.test->test_name, env.subtest_state->name); + else + snprintf(test_name, sizeof(test_name), "%s", + env.test->test_name); + + switch (env.watchdog_state) { + case WD_NOTIFY: + fprintf(env.stderr_saved, "WATCHDOG: test case %s executes for %d seconds...\n", + test_name, env.secs_till_notify); + timeout.it_value.tv_sec = env.secs_till_kill - env.secs_till_notify; + env.watchdog_state = WD_KILL; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to arm watchdog timer\n"); + break; + case WD_KILL: + fprintf(env.stderr_saved, + "WATCHDOG: test case %s executes for %d seconds, terminating with SIGSEGV\n", + test_name, env.secs_till_kill); + pthread_kill(env.main_thread, SIGSEGV); + break; + } +} + +static void watchdog_start(void) +{ + struct itimerspec timeout = {}; + int err; + + if (env.secs_till_kill == 0) + return; + if (env.secs_till_notify > 0) { + env.watchdog_state = WD_NOTIFY; + timeout.it_value.tv_sec = env.secs_till_notify; + } else { + env.watchdog_state = WD_KILL; + timeout.it_value.tv_sec = env.secs_till_kill; + } + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to start watchdog timer\n"); +} + +static void watchdog_stop(void) +{ + struct itimerspec timeout = {}; + int err; + + env.watchdog_state = WD_NOTIFY; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to stop watchdog timer\n"); +} + +static void watchdog_init(void) +{ + struct sigevent watchdog_sev = { + .sigev_notify = SIGEV_THREAD, + .sigev_notify_function = watchdog_timer_func, + }; + int err; + + env.main_thread = pthread_self(); + err = timer_create(CLOCK_MONOTONIC, &watchdog_sev, &env.watchdog); + if (err) + fprintf(stderr, "Failed to initialize watchdog timer\n"); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; @@ -512,6 +598,7 @@ bool test__start_subtest(const char *subtest_name) env.subtest_state = subtest_state; stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt); + watchdog_start(); return true; } @@ -777,6 +864,7 @@ enum ARG_KEYS { ARG_DEBUG = -1, ARG_JSON_SUMMARY = 'J', ARG_TRAFFIC_MONITOR = 'm', + ARG_WATCHDOG_TIMEOUT = 'w', }; static const struct argp_option opts[] = { @@ -807,6 +895,8 @@ static const struct argp_option opts[] = { { "traffic-monitor", ARG_TRAFFIC_MONITOR, "NAMES", 0, "Monitor network traffic of tests with name matching the pattern (supports '*' wildcard)." }, #endif + { "watchdog-timeout", ARG_WATCHDOG_TIMEOUT, "SECONDS", 0, + "Kill the process if tests are not making progress for specified number of seconds." }, {}, }; @@ -868,6 +958,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, va_copy(args2, args); vfprintf(libbpf_capture_stream, format, args2); + va_end(args2); } if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) @@ -1031,6 +1122,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) true); break; #endif + case ARG_WATCHDOG_TIMEOUT: + env->secs_till_kill = atoi(arg); + if (env->secs_till_kill < 0) { + fprintf(stderr, "Invalid watchdog timeout: %s.\n", arg); + return -EINVAL; + } + if (env->secs_till_kill < env->secs_till_notify) { + env->secs_till_notify = 0; + } + break; default: return ARGP_ERR_UNKNOWN; } @@ -1259,10 +1360,12 @@ static void run_one_test(int test_num) stdio_hijack(&state->log_buf, &state->log_cnt); + watchdog_start(); if (test->run_test) test->run_test(); else if (test->run_serial_test) test->run_serial_test(); + watchdog_stop(); /* ensure last sub-test is finalized properly */ if (env.subtest_state) @@ -1703,6 +1806,7 @@ out: static int worker_main(int sock) { save_netns(); + watchdog_init(); while (true) { /* receive command */ @@ -1812,6 +1916,8 @@ int main(int argc, char **argv) sigaction(SIGSEGV, &sigact, NULL); + env.secs_till_notify = 10; + env.secs_till_kill = 120; err = argp_parse(&argp, argc, argv, 0, NULL, &env); if (err) return err; @@ -1820,6 +1926,8 @@ int main(int argc, char **argv) if (err) return err; + watchdog_init(); + /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 7767d9a825ae..74de33ae37e5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -131,6 +131,12 @@ struct test_env { pid_t *worker_pids; /* array of worker pids */ int *worker_socks; /* array of worker socks */ int *worker_current_test; /* array of current running test for each worker */ + + pthread_t main_thread; + int secs_till_notify; + int secs_till_kill; + timer_t watchdog; /* watch for stalled tests/subtests */ + enum { WD_NOTIFY, WD_KILL } watchdog_state; }; #define MAX_LOG_TRUNK_SIZE 8192 @@ -390,6 +396,14 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_ERR_FD(fd, name) ({ \ + static int duration = 0; \ + int ___fd = (fd); \ + bool ___ok = ___fd < 0; \ + CHECK(!___ok, (name), "unexpected fd: %d\n", ___fd); \ + ___ok; \ +}) + #define SYS(goto_label, fmt, ...) \ ({ \ char cmd[1024]; \ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 3e02d7267de8..fd2da2234cc9 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -56,6 +56,8 @@ static void running_handler(int a); #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o" #define CG_PATH "/sockmap" +#define EDATAINTEGRITY 2001 + /* global sockets */ int s1, s2, c1, c2, p1, p2; int test_cnt; @@ -86,6 +88,10 @@ int ktls; int peek_flag; int skb_use_parser; int txmsg_omit_skb_parser; +int verify_push_start; +int verify_push_len; +int verify_pop_start; +int verify_pop_len; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -418,16 +424,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, { bool drop = opt->drop_expected; unsigned char k = 0; + int i, j, fp; FILE *file; - int i, fp; file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; } - for (i = 0; i < iov_length * cnt; i++, k++) - fwrite(&k, sizeof(char), 1, file); + for (i = 0; i < cnt; i++, k = 0) { + for (j = 0; j < iov_length; j++, k++) + fwrite(&k, sizeof(char), 1, file); + } fflush(file); fseek(file, 0, SEEK_SET); @@ -510,42 +518,111 @@ unwind_iov: return -ENOMEM; } -static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) +/* In push or pop test, we need to do some calculations for msg_verify_data */ +static void msg_verify_date_prep(void) { - int i, j = 0, bytes_cnt = 0; - unsigned char k = 0; + int push_range_end = txmsg_start_push + txmsg_end_push - 1; + int pop_range_end = txmsg_start_pop + txmsg_pop - 1; + + if (txmsg_end_push && txmsg_pop && + txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) { + /* The push range and the pop range overlap */ + int overlap_len; + + verify_push_start = txmsg_start_push; + verify_pop_start = txmsg_start_pop; + if (txmsg_start_push < txmsg_start_pop) + overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop); + else + overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push); + verify_push_len = max(txmsg_end_push - overlap_len, 0); + verify_pop_len = max(txmsg_pop - overlap_len, 0); + } else { + /* Otherwise */ + verify_push_start = txmsg_start_push; + verify_pop_start = txmsg_start_pop; + verify_push_len = txmsg_end_push; + verify_pop_len = txmsg_pop; + } +} - for (i = 0; i < msg->msg_iovlen; i++) { +static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz, + unsigned char *k_p, int *bytes_cnt_p, + int *check_cnt_p, int *push_p) +{ + int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p; + unsigned char k = *k_p; + int i, j; + + for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) { unsigned char *d = msg->msg_iov[i].iov_base; /* Special case test for skb ingress + ktls */ if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) - return -EIO; + return -EDATAINTEGRITY; if (memcmp(d, "PASS", 4) != 0) { fprintf(stderr, "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; + return -EDATAINTEGRITY; } j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { + if (push > 0 && + check_cnt == verify_push_start + verify_push_len - push) { + int skipped; +revisit_push: + skipped = push; + if (j + push >= msg->msg_iov[i].iov_len) + skipped = msg->msg_iov[i].iov_len - j; + push -= skipped; + size -= skipped; + j += skipped - 1; + check_cnt += skipped; + continue; + } + + if (verify_pop_len > 0 && check_cnt == verify_pop_start) { + bytes_cnt += verify_pop_len; + check_cnt += verify_pop_len; + k += verify_pop_len; + + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + check_cnt = 0; + push = verify_push_len; + } + + if (push > 0 && + check_cnt == verify_push_start + verify_push_len - push) + goto revisit_push; + } + if (d[j] != k++) { fprintf(stderr, "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", i, j, d[j], k - 1, d[j+1], k); - return -EIO; + return -EDATAINTEGRITY; } bytes_cnt++; + check_cnt++; if (bytes_cnt == chunk_sz) { k = 0; bytes_cnt = 0; + check_cnt = 0; + push = verify_push_len; } size--; } } + *k_p = k; + *bytes_cnt_p = bytes_cnt; + *check_cnt_p = check_cnt; + *push_p = push; return 0; } @@ -598,10 +675,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { + float total_bytes, txmsg_pop_total, txmsg_push_total; int slct, recvp = 0, recv, max_fd = fd; - float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; + unsigned char k = 0; + int bytes_cnt = 0; + int check_cnt = 0; + int push = 0; fd_set w; fcntl(fd, fd_flags); @@ -615,12 +696,22 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, * This is really only useful for testing edge cases in code * paths. */ - total_bytes = (float)iov_count * (float)iov_length * (float)cnt; - if (txmsg_apply) + total_bytes = (float)iov_length * (float)cnt; + if (!opt->sendpage) + total_bytes *= (float)iov_count; + if (txmsg_apply) { + txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply); txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply); - else + } else { + txmsg_push_total = txmsg_end_push * cnt; txmsg_pop_total = txmsg_pop * cnt; + } + total_bytes += txmsg_push_total; total_bytes -= txmsg_pop_total; + if (data) { + msg_verify_date_prep(); + push = verify_push_len; + } err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time"); @@ -693,10 +784,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (data) { int chunk_sz = opt->sendpage ? - iov_length * cnt : + iov_length : iov_length * iov_count; - errno = msg_verify_data(&msg, recv, chunk_sz); + errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt, + &check_cnt, &push); if (errno) { perror("data verify msg failed"); goto out_errno; @@ -704,7 +796,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (recvp) { errno = msg_verify_data(&msg_peek, recvp, - chunk_sz); + chunk_sz, + &k, + &bytes_cnt, + &check_cnt, + &push); if (errno) { perror("data verify msg_peek failed"); goto out_errno; @@ -786,8 +882,6 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - if (txmsg_pop || txmsg_start_pop) - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -812,7 +906,7 @@ static int sendmsg_test(struct sockmap_options *opt) s.bytes_sent, sent_Bps, sent_Bps/giga, s.bytes_recvd, recvd_Bps, recvd_Bps/giga, peek_flag ? "(peek_msg)" : ""); - if (err && txmsg_cork) + if (err && err != -EDATAINTEGRITY && txmsg_cork) err = 0; exit(err ? 1 : 0); } else if (rxpid == -1) { @@ -1456,8 +1550,8 @@ static void test_send_many(struct sockmap_options *opt, int cgrp) static void test_send_large(struct sockmap_options *opt, int cgrp) { - opt->iov_length = 256; - opt->iov_count = 1024; + opt->iov_length = 8192; + opt->iov_count = 32; opt->rate = 2; test_exec(cgrp, opt); } @@ -1485,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt) static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt) { - txmsg_redir = 1; opt->tx_wait_mem = true; + txmsg_redir = 1; + test_send_large(opt, cgrp); + + txmsg_redir = 1; + txmsg_apply = 4097; test_send_large(opt, cgrp); opt->tx_wait_mem = false; } @@ -1586,17 +1684,19 @@ static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt) static void test_txmsg_pull(int cgrp, struct sockmap_options *opt) { /* Test basic start/end */ + txmsg_pass = 1; txmsg_start = 1; txmsg_end = 2; test_send(opt, cgrp); /* Test >4k pull */ + txmsg_pass = 1; txmsg_start = 4096; txmsg_end = 9182; test_send_large(opt, cgrp); /* Test pull + redirect */ - txmsg_redir = 0; + txmsg_redir = 1; txmsg_start = 1; txmsg_end = 2; test_send(opt, cgrp); @@ -1618,12 +1718,16 @@ static void test_txmsg_pull(int cgrp, struct sockmap_options *opt) static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) { + bool data = opt->data_test; + /* Test basic pop */ + txmsg_pass = 1; txmsg_start_pop = 1; txmsg_pop = 2; test_send_many(opt, cgrp); /* Test pop with >4k */ + txmsg_pass = 1; txmsg_start_pop = 4096; txmsg_pop = 4096; test_send_large(opt, cgrp); @@ -1634,6 +1738,12 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) txmsg_pop = 2; test_send_many(opt, cgrp); + /* TODO: Test for pop + cork should be different, + * - It makes the layout of the received data difficult + * - It makes it hard to calculate the total_bytes in the recvmsg + * Temporarily skip the data integrity test for this case now. + */ + opt->data_test = false; /* Test pop + cork */ txmsg_redir = 0; txmsg_cork = 512; @@ -1647,16 +1757,21 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) txmsg_start_pop = 1; txmsg_pop = 2; test_send_many(opt, cgrp); + opt->data_test = data; } static void test_txmsg_push(int cgrp, struct sockmap_options *opt) { + bool data = opt->data_test; + /* Test basic push */ + txmsg_pass = 1; txmsg_start_push = 1; txmsg_end_push = 1; test_send(opt, cgrp); /* Test push 4kB >4k */ + txmsg_pass = 1; txmsg_start_push = 4096; txmsg_end_push = 4096; test_send_large(opt, cgrp); @@ -1667,18 +1782,63 @@ static void test_txmsg_push(int cgrp, struct sockmap_options *opt) txmsg_end_push = 2; test_send_many(opt, cgrp); + /* TODO: Test for push + cork should be different, + * - It makes the layout of the received data difficult + * - It makes it hard to calculate the total_bytes in the recvmsg + * Temporarily skip the data integrity test for this case now. + */ + opt->data_test = false; /* Test push + cork */ txmsg_redir = 0; txmsg_cork = 512; txmsg_start_push = 1; txmsg_end_push = 2; test_send_many(opt, cgrp); + opt->data_test = data; } static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt) { + /* Test push/pop range overlapping */ + txmsg_pass = 1; + txmsg_start_push = 1; + txmsg_end_push = 10; + txmsg_start_pop = 5; + txmsg_pop = 4; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 1; + txmsg_end_push = 10; + txmsg_start_pop = 5; + txmsg_pop = 16; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 5; + txmsg_end_push = 4; + txmsg_start_pop = 1; + txmsg_pop = 10; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 5; + txmsg_end_push = 16; + txmsg_start_pop = 1; + txmsg_pop = 10; + test_send_large(opt, cgrp); + + /* Test push/pop range non-overlapping */ + txmsg_pass = 1; txmsg_start_push = 1; txmsg_end_push = 10; + txmsg_start_pop = 16; + txmsg_pop = 4; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 16; + txmsg_end_push = 10; txmsg_start_pop = 5; txmsg_pop = 4; test_send_large(opt, cgrp); diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh deleted file mode 100755 index b42c24282c25..000000000000 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2018 Facebook -# Copyright (c) 2019 Cloudflare - -set -eu -readonly NS1="ns1-$(mktemp -u XXXXXX)" - -wait_for_ip() -{ - local _i - printf "Wait for IP %s to become available " "$1" - for _i in $(seq ${MAX_PING_TRIES}); do - printf "." - if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then - echo " OK" - return - fi - sleep 1 - done - echo 1>&2 "ERROR: Timeout waiting for test IP to become available." - exit 1 -} - -get_prog_id() -{ - awk '/ id / {sub(/.* id /, "", $0); print($1)}' -} - -ns1_exec() -{ - ip netns exec ${NS1} "$@" -} - -setup() -{ - ip netns add ${NS1} - ns1_exec ip link set lo up - - ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 - ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0 - ns1_exec sysctl -w net.ipv4.tcp_timestamps=0 - ns1_exec sysctl -w net.ipv4.tcp_sack=0 - - wait_for_ip 127.0.0.1 - wait_for_ip ::1 -} - -cleanup() -{ - ip netns del ns1 2>/dev/null || : -} - -main() -{ - trap cleanup EXIT 2 3 6 15 - setup - - printf "Testing clsact..." - ns1_exec tc qdisc add dev "${TEST_IF}" clsact - ns1_exec tc filter add dev "${TEST_IF}" ingress \ - bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da - - BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \ - get_prog_id) - ns1_exec "${PROG}" "${BPF_PROG_ID}" - ns1_exec tc qdisc del dev "${TEST_IF}" clsact - - printf "Testing XDP..." - ns1_exec ip link set "${TEST_IF}" xdp \ - object "${BPF_PROG_OBJ}" section "${XDP_SECTION}" - BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id) - ns1_exec "${PROG}" "${BPF_PROG_ID}" -} - -DIR=$(dirname $0) -TEST_IF=lo -MAX_PING_TRIES=5 -BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.bpf.o" -CLSACT_SECTION="tc" -XDP_SECTION="xdp" -BPF_PROG_ID=0 -PROG="${DIR}/test_tcp_check_syncookie_user" - -main diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c deleted file mode 100644 index 3844f9b8232a..000000000000 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ /dev/null @@ -1,213 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook -// Copyright (c) 2019 Cloudflare - -#include <limits.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <netinet/in.h> -#include <sys/types.h> -#include <sys/socket.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "network_helpers.h" - -static int get_map_fd_by_prog_id(int prog_id, bool *xdp) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 map_ids[1]; - int prog_fd = -1; - int map_fd = -1; - - prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd < 0) { - log_err("Failed to get fd by prog id %d", prog_id); - goto err; - } - - info.nr_map_ids = 1; - info.map_ids = (__u64)(unsigned long)map_ids; - - if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) { - log_err("Failed to get info by prog fd %d", prog_fd); - goto err; - } - - if (!info.nr_map_ids) { - log_err("No maps found for prog fd %d", prog_fd); - goto err; - } - - *xdp = info.type == BPF_PROG_TYPE_XDP; - - map_fd = bpf_map_get_fd_by_id(map_ids[0]); - if (map_fd < 0) - log_err("Failed to get fd by map id %d", map_ids[0]); -err: - if (prog_fd >= 0) - close(prog_fd); - return map_fd; -} - -static int run_test(int server_fd, int results_fd, bool xdp) -{ - int client = -1, srv_client = -1; - int ret = 0; - __u32 key = 0; - __u32 key_gen = 1; - __u32 key_mss = 2; - __u32 value = 0; - __u32 value_gen = 0; - __u32 value_mss = 0; - - if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - client = connect_to_fd(server_fd, 0); - if (client == -1) - goto err; - - srv_client = accept(server_fd, NULL, 0); - if (srv_client == -1) { - log_err("Can't accept connection"); - goto err; - } - - if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (value == 0) { - log_err("Didn't match syncookie: %u", value); - goto err; - } - - if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (xdp && value_gen == 0) { - // SYN packets do not get passed through generic XDP, skip the - // rest of the test. - printf("Skipping XDP cookie check\n"); - goto out; - } - - if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (value != value_gen) { - log_err("BPF generated cookie does not match kernel one"); - goto err; - } - - if (value_mss < 536 || value_mss > USHRT_MAX) { - log_err("Unexpected MSS retrieved"); - goto err; - } - - goto out; - -err: - ret = 1; -out: - close(client); - close(srv_client); - return ret; -} - -static int v6only_true(int fd, void *opts) -{ - int mode = true; - - return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); -} - -static int v6only_false(int fd, void *opts) -{ - int mode = false; - - return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); -} - -int main(int argc, char **argv) -{ - struct network_helper_opts opts = { 0 }; - int server = -1; - int server_v6 = -1; - int server_dual = -1; - int results = -1; - int err = 0; - bool xdp; - - if (argc < 2) { - fprintf(stderr, "Usage: %s prog_id\n", argv[0]); - exit(1); - } - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); - if (results < 0) { - log_err("Can't get map"); - goto err; - } - - server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL); - if (server == -1) - goto err; - - opts.post_socket_cb = v6only_true; - server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); - if (server_v6 == -1) - goto err; - - opts.post_socket_cb = v6only_false; - server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts); - if (server_dual == -1) - goto err; - - if (run_test(server, results, xdp)) - goto err; - - if (run_test(server_v6, results, xdp)) - goto err; - - if (run_test(server_dual, results, xdp)) - goto err; - - printf("ok\n"); - goto out; -err: - err = 1; -out: - close(server); - close(server_v6); - close(server_dual); - close(results); - return err; -} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 610392dfc4fb..447b68509d76 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -42,10 +42,6 @@ #include "../../../include/linux/filter.h" #include "testing_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - #define MAX_INSNS BPF_MAXINSNS #define MAX_EXPECTED_INSNS 32 #define MAX_UNEXPECTED_INSNS 32 diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index d3c3c3a24150..5e9f16683be5 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -367,7 +367,7 @@ int delete_module(const char *name, int flags) return syscall(__NR_delete_module, name, flags); } -int unload_bpf_testmod(bool verbose) +int unload_module(const char *name, bool verbose) { int ret, cnt = 0; @@ -375,11 +375,11 @@ int unload_bpf_testmod(bool verbose) fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); for (;;) { - ret = delete_module("bpf_testmod", 0); + ret = delete_module(name, 0); if (!ret || errno != EAGAIN) break; if (++cnt > 10000) { - fprintf(stdout, "Unload of bpf_testmod timed out\n"); + fprintf(stdout, "Unload of %s timed out\n", name); break; } usleep(100); @@ -388,41 +388,51 @@ int unload_bpf_testmod(bool verbose) if (ret) { if (errno == ENOENT) { if (verbose) - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + fprintf(stdout, "%s.ko is already unloaded.\n", name); return -1; } - fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + fprintf(stdout, "Failed to unload %s.ko from kernel: %d\n", name, -errno); return -1; } if (verbose) - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); + fprintf(stdout, "Successfully unloaded %s.ko.\n", name); return 0; } -int load_bpf_testmod(bool verbose) +int load_module(const char *path, bool verbose) { int fd; if (verbose) - fprintf(stdout, "Loading bpf_testmod.ko...\n"); + fprintf(stdout, "Loading %s...\n", path); - fd = open("bpf_testmod.ko", O_RDONLY); + fd = open(path, O_RDONLY); if (fd < 0) { - fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno); return -ENOENT; } if (finit_module(fd, "", 0)) { - fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno); close(fd); return -EINVAL; } close(fd); if (verbose) - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + fprintf(stdout, "Successfully loaded %s.\n", path); return 0; } +int unload_bpf_testmod(bool verbose) +{ + return unload_module("bpf_testmod", verbose); +} + +int load_bpf_testmod(bool verbose) +{ + return load_module("bpf_testmod.ko", verbose); +} + /* * Trigger synchronize_rcu() in kernel. */ diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index d55f6ab12433..46d7f7089f63 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -38,6 +38,8 @@ int unload_bpf_testmod(bool verbose); int kern_sync_rcu(void); int finit_module(int fd, const char *param_values, int flags); int delete_module(const char *name, int flags); +int load_module(const char *path, bool verbose); +int unload_module(const char *name, bool verbose); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 2d742fdac6b9..81943c6254e6 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st return 0; } #else +# ifndef PROCMAP_QUERY_VMA_EXECUTABLE +# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 +# endif + static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) { return -EOPNOTSUPP; diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index c7828b13e5ff..dd38dc68f635 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -12,6 +12,10 @@ #define MADV_POPULATE_READ 22 #endif +#ifndef MADV_PAGEOUT +#define MADV_PAGEOUT 21 +#endif + int __attribute__((weak)) uprobe(void) { return 0; diff --git a/tools/testing/selftests/bpf/uptr_test_common.h b/tools/testing/selftests/bpf/uptr_test_common.h new file mode 100644 index 000000000000..f8a134ba12f9 --- /dev/null +++ b/tools/testing/selftests/bpf/uptr_test_common.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#ifndef _UPTR_TEST_COMMON_H +#define _UPTR_TEST_COMMON_H + +#define MAGIC_VALUE 0xabcd1234 +#define PAGE_SIZE 4096 + +#ifdef __BPF__ +/* Avoid fwd btf type being generated for the following struct */ +struct large_data *dummy_large_data; +struct empty_data *dummy_empty_data; +struct user_data *dummy_data; +struct cgroup *dummy_cgrp; +#else +#define __uptr +#define __kptr +#endif + +struct user_data { + int a; + int b; + int result; + int nested_result; +}; + +struct nested_udata { + struct user_data __uptr *udata; +}; + +struct value_type { + struct user_data __uptr *udata; + struct cgroup __kptr *cgrp; + struct nested_udata nested; +}; + +struct value_lock_type { + struct user_data __uptr *udata; + struct bpf_spin_lock lock; +}; + +struct large_data { + __u8 one_page[PAGE_SIZE]; + int a; +}; + +struct large_uptr { + struct large_data __uptr *udata; +}; + +struct empty_data { +}; + +struct empty_uptr { + struct empty_data __uptr *udata; +}; + +struct kstruct_uptr { + struct cgroup __uptr *cgrp; +}; + +#endif diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1ec5c4c47235..e12ef953fba8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -16,6 +16,7 @@ #include <sys/stat.h> #include <bpf/libbpf.h> #include <bpf/btf.h> +#include <bpf/bpf.h> #include <libelf.h> #include <gelf.h> #include <float.h> @@ -179,6 +180,7 @@ static struct env { int files_skipped; int progs_processed; int progs_skipped; + int top_src_lines; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -228,6 +230,7 @@ static const struct argp_option opts[] = { "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, + { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, {}, }; @@ -327,6 +330,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case 'S': + errno = 0; + env.top_src_lines = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top lines N specifier: %s\n", arg); + argp_usage(state); + } + break; case ARGP_KEY_ARG: tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); if (!tmp) @@ -854,6 +865,118 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +struct line_cnt { + char *line; + int cnt; +}; + +static int str_cmp(const void *a, const void *b) +{ + const char **str1 = (const char **)a; + const char **str2 = (const char **)b; + + return strcmp(*str1, *str2); +} + +static int line_cnt_cmp(const void *a, const void *b) +{ + const struct line_cnt *a_cnt = (const struct line_cnt *)a; + const struct line_cnt *b_cnt = (const struct line_cnt *)b; + + if (a_cnt->cnt != b_cnt->cnt) + return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return strcmp(a_cnt->line, b_cnt->line); +} + +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name) +{ + int lines_cap = 0; + int lines_size = 0; + char **lines = NULL; + char *line = NULL; + char *state; + struct line_cnt *freq = NULL; + struct line_cnt *cur; + int unique_lines; + int err = 0; + int i; + + while ((line = strtok_r(line ? NULL : buf, "\n", &state))) { + if (strncmp(line, "; ", 2) != 0) + continue; + line += 2; + + if (lines_size == lines_cap) { + char **tmp; + + lines_cap = max(16, lines_cap * 2); + tmp = realloc(lines, lines_cap * sizeof(*tmp)); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + lines = tmp; + } + lines[lines_size] = line; + lines_size++; + } + + if (lines_size == 0) + goto cleanup; + + qsort(lines, lines_size, sizeof(*lines), str_cmp); + + freq = calloc(lines_size, sizeof(*freq)); + if (!freq) { + err = -ENOMEM; + goto cleanup; + } + + cur = freq; + cur->line = lines[0]; + cur->cnt = 1; + for (i = 1; i < lines_size; ++i) { + if (strcmp(lines[i], cur->line) != 0) { + cur++; + cur->line = lines[i]; + cur->cnt = 0; + } + cur->cnt++; + } + unique_lines = cur - freq + 1; + + qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp); + + printf("Top source lines (%s):\n", prog_name); + for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) { + const char *src_code = freq[i].line; + const char *src_line = NULL; + char *split = strrchr(freq[i].line, '@'); + + if (split) { + src_line = split + 1; + + while (*src_line && isspace(*src_line)) + src_line++; + + while (split > src_code && isspace(*split)) + split--; + *split = '\0'; + } + + if (src_line) + printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code); + else + printf("%5d: %s\n", freq[i].cnt, src_code); + } + printf("\n"); + +cleanup: + free(freq); + free(lines); + return err; +} + static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_prog_type *prog_type, enum bpf_attach_type *attach_type) @@ -987,6 +1110,35 @@ skip_freplace_fixup: return; } +static int max_verifier_log_size(void) +{ + const int SMALL_LOG_SIZE = UINT_MAX >> 8; + const int BIG_LOG_SIZE = UINT_MAX >> 2; + struct bpf_insn insns[] = { + { .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, }, + { .code = BPF_JMP | BPF_EXIT, }, + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_size = BIG_LOG_SIZE, + .log_buf = (void *)-1, + .log_level = 4 + ); + int ret, insn_cnt = ARRAY_SIZE(insns); + static int log_size; + + if (log_size != 0) + return log_size; + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + + if (ret == -EFAULT) + log_size = BIG_LOG_SIZE; + else /* ret == -EINVAL, big log size is not supported by the verifier */ + log_size = SMALL_LOG_SIZE; + + return log_size; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); @@ -1009,13 +1161,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats = &env.prog_stats[env.prog_stat_cnt++]; memset(stats, 0, sizeof(*stats)); - if (env.verbose) { - buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; + if (env.verbose || env.top_src_lines > 0) { + buf_sz = env.log_size ? env.log_size : max_verifier_log_size(); buf = malloc(buf_sz); if (!buf) return -ENOMEM; /* ensure we always request stats */ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); + /* --top-src-lines needs verifier log */ + if (env.top_src_lines > 0 && env.log_level == 0) + log_level |= 2; } else { buf = verif_log_buf; buf_sz = sizeof(verif_log_buf); @@ -1048,6 +1203,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf filename, prog_name, stats->stats[DURATION], err ? "failure" : "success", buf); } + if (env.top_src_lines > 0) + print_top_src_lines(buf, buf_sz, stats->prog_name); if (verif_log_buf != buf) free(buf); diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg index 1a385061618d..e661ffdcaadf 100644 --- a/tools/testing/selftests/bpf/veristat.cfg +++ b/tools/testing/selftests/bpf/veristat.cfg @@ -15,3 +15,4 @@ test_usdt* test_verif_scale* test_xdp_noinline* xdp_synproxy* +verifier_search_pruning* diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index dad2ed82f3ef..a2b50af8e9ee 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -8,6 +8,7 @@ #include <pthread.h> #include <stdio.h> #include <time.h> +#include <unistd.h> #include "../kselftest.h" #include "cgroup_util.h" @@ -229,6 +230,79 @@ cleanup: return ret; } +/* + * Creates a nice process that consumes CPU and checks that the elapsed + * usertime in the cgroup is close to the expected time. + */ +static int test_cpucg_nice(const char *root) +{ + int ret = KSFT_FAIL; + int status; + long user_usec, nice_usec; + long usage_seconds = 2; + long expected_nice_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + pid_t pid; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); + if (nice_usec == -1) + ret = KSFT_SKIP; + if (user_usec != 0 || nice_usec != 0) + goto cleanup; + + /* + * We fork here to create a new process that can be niced without + * polluting the nice value of other selftests + */ + pid = fork(); + if (pid < 0) { + goto cleanup; + } else if (pid == 0) { + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_PROCESS, + }; + char buf[64]; + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cpucg, "cgroup.procs", buf)) + goto cleanup; + + /* Try to keep niced CPU usage as constrained to hog_cpu as possible */ + nice(1); + hog_cpus_timed(cpucg, ¶m); + exit(0); + } else { + waitpid(pid, &status, 0); + if (!WIFEXITED(status)) + goto cleanup; + + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); + if (!values_close(nice_usec, expected_nice_usec, 1)) + goto cleanup; + + ret = KSFT_PASS; + } + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + static int run_cpucg_weight_test( const char *root, @@ -686,6 +760,7 @@ struct cpucg_test { } tests[] = { T(test_cpucg_subtree_control), T(test_cpucg_stats), + T(test_cpucg_nice), T(test_cpucg_weight_overprovisioned), T(test_cpucg_weight_underprovisioned), T(test_cpucg_nested_weight_overprovisioned), diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b2a6a5dd1af..812f656260fb 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,7 +6,7 @@ TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race TEST_GEN_FILES += debugfs_target_ids_pid_leak TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _debugfs_common.sh +TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py # functionality tests TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh index aa995516870b..54d45791b0d9 100644 --- a/tools/testing/selftests/damon/_debugfs_common.sh +++ b/tools/testing/selftests/damon/_debugfs_common.sh @@ -8,7 +8,12 @@ test_write_result() { expect_reason=$4 expected=$5 - echo "$content" > "$file" + if [ "$expected" = "0" ] + then + echo "$content" > "$file" + else + echo "$content" > "$file" 2> /dev/null + fi if [ $? -ne "$expected" ] then echo "writing $content to $file doesn't return $expected" diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index 3be121487432..a9f4e9aaf3a9 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -14,10 +14,8 @@ int main(int argc, char *argv[]) { char **regions; - clock_t start_clock; int nr_regions; int sz_region; - int access_time_ms; int i; if (argc != 3) { diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh index 4a76e37ef16b..bd6c22d96ead 100755 --- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh @@ -12,7 +12,7 @@ then exit 1 fi -if echo foo > "$DBGFS/mk_contexts" +if echo foo > "$DBGFS/mk_contexts" 2> /dev/null then echo "duplicate context creation success" exit 1 diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index a6fe0689f88d..53e69a669668 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -18,7 +18,7 @@ void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); - char buf[25]; + char buf[256]; int ret; printf("%s %s\n", __func__, file); @@ -28,9 +28,7 @@ void write_read_with_huge_count(char *file) } write(filedesc, "", 0xfffffffful); - perror("after write: "); ret = read(filedesc, buf, 0xfffffffful); - perror("after read: "); close(filedesc); } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 39fb97a8c1df..0fec8f9801ad 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -9,6 +9,7 @@ TEST_PROGS := \ ping.py \ queues.py \ stats.py \ + shaper.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 41d0859feb7d..edc56e2cc606 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,6 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh +c_maddr="33:33:00:00:00:10" +g_maddr="33:33:00:00:02:54" skip_prio() { @@ -240,6 +242,54 @@ arp_validate_test() done } +# Testing correct multicast groups are added to slaves for ns targets +arp_validate_mcast() +{ + RET=0 + local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate") + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done + + # Do failover + ip -n ${s_ns} link set ${active_slave} down + # wait for active link change + slowwait 2 active_slave_changed $active_slave + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done +} + arp_validate_arp() { local mode=$1 @@ -261,8 +311,10 @@ arp_validate_ns() fi for val in $(seq 0 6); do - arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val" + arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val" log_test "arp_validate" "$mode ns_ip6_target arp_validate $val" + arp_validate_mcast + log_test "arp_validate" "join mcast group" done } diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore new file mode 100644 index 000000000000..e9fe6ede681a --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -0,0 +1 @@ +ncdevmem diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index c9f2f48fc30f..21ba64ce1e34 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = \ csum.py \ devlink_port_split.py \ + devmem.py \ ethtool.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ @@ -10,6 +11,8 @@ TEST_PROGS = \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ loopback.sh \ + nic_link_layer.py \ + nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ # @@ -26,4 +29,12 @@ TEST_INCLUDES := \ ../../../net/forwarding/tc_common.sh \ # +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := ncdevmem +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../../lib.mk + +# YNL build +YNL_GENS := ethtool netdev +include ../../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py new file mode 100755 index 000000000000..1223f0f5c10c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ksft_disruptive + + +def require_devmem(cfg): + if not hasattr(cfg, "_devmem_probed"): + port = rand_port() + probe_command = f"./ncdevmem -f {cfg.ifname}" + cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg._devmem_probed = True + + if not cfg._devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +@ksft_disruptive +def check_rx(cfg) -> None: + cfg.require_v6() + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + + with bkg(listen_cmd) as socat: + wait_port_listen(port) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([check_rx], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b582885786f5..399789a9676a 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -9,6 +9,7 @@ try: sys.path.append(KSFT_DIR.as_posix()) from net.lib.py import * from drivers.net.lib.py import * + from .linkconfig import LinkConfig except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py new file mode 100644 index 000000000000..79fde603cbbc --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py @@ -0,0 +1,222 @@ +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import cmd, ethtool, ip +from lib.py import ksft_pr, ksft_eq, KsftSkipEx +from typing import Optional +import re +import time +import json + +#The LinkConfig class is implemented to handle the link layer configurations. +#Required minimum ethtool version is 6.10 + +class LinkConfig: + """Class for handling the link layer configurations""" + def __init__(self, cfg: object) -> None: + self.cfg = cfg + self.partner_netif = self.get_partner_netif_name() + + """Get the initial link configuration of local interface""" + self.common_link_modes = self.get_common_link_modes() + + def get_partner_netif_name(self) -> Optional[str]: + partner_netif = None + try: + if not self.verify_link_up(): + return None + """Get partner interface name""" + partner_json_output = ip("addr show", json=True, host=self.cfg.remote) + for interface in partner_json_output: + for addr in interface.get('addr_info', []): + if addr.get('local') == self.cfg.remote_addr: + partner_netif = interface['ifname'] + ksft_pr(f"Partner Interface name: {partner_netif}") + if partner_netif is None: + ksft_pr("Unable to get the partner interface name") + except Exception as e: + print(f"Unexpected error occurred while getting partner interface name: {e}") + self.partner_netif = partner_netif + return partner_netif + + def verify_link_up(self) -> bool: + """Verify whether the local interface link is up""" + with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp: + link_state = fp.read().strip() + + if link_state == "down": + ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN") + return False + else: + return True + + def reset_interface(self, local: bool = True, remote: bool = True) -> bool: + ksft_pr("Resetting interfaces in local and remote") + if remote: + if self.verify_link_up(): + if self.partner_netif is not None: + ifname = self.partner_netif + link_up_cmd = f"ip link set up {ifname}" + link_down_cmd = f"ip link set down {ifname}" + reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" + try: + cmd(reset_cmd, host=self.cfg.remote) + except Exception as e: + ksft_pr(f"Unexpected error occurred while resetting remote: {e}") + else: + ksft_pr("Partner interface not available") + if local: + ifname = self.cfg.ifname + link_up_cmd = f"ip link set up {ifname}" + link_down_cmd = f"ip link set down {ifname}" + reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" + try: + cmd(reset_cmd) + except Exception as e: + ksft_pr(f"Unexpected error occurred while resetting local: {e}") + time.sleep(10) + if self.verify_link_up() and self.get_ethtool_field("link-detected"): + ksft_pr("Local and remote interfaces reset to original state") + return True + else: + ksft_pr("Error occurred after resetting interfaces. Link is DOWN.") + return False + + def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool: + """Set the speed and duplex state for the interface""" + autoneg_state = "on" if autoneg is True else "off" + process = None + try: + process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}") + except Exception as e: + ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}") + if process is None or process.ret != 0: + return False + else: + ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}") + return True + + def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool: + if not self.verify_link_up(): + return False + """Verifying the speed and duplex state for the interface""" + with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp: + actual_speed = fp.read().strip() + with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp: + actual_duplex = fp.read().strip() + + ksft_eq(actual_speed, expected_speed) + ksft_eq(actual_duplex, expected_duplex) + return True + + def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool: + common_link_modes = self.common_link_modes + speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes) + speed = speeds[0] + duplex = duplex_modes[0] + if not speed or not duplex: + ksft_pr("No speed or duplex modes found") + return False + + speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else "" + if remote: + if not self.verify_link_up(): + return False + """Set the autonegotiation state for the partner""" + command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}" + partner_autoneg_change = None + """Set autonegotiation state for interface in remote pc""" + try: + partner_autoneg_change = ethtool(command, host=self.cfg.remote) + except Exception as e: + ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}") + if partner_autoneg_change is None or partner_autoneg_change.ret != 0: + ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.") + return False + ksft_pr(f"Autoneg set as {state} for {self.partner_netif}") + else: + """Set the autonegotiation state for the interface""" + try: + process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}") + if process.ret != 0: + ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}") + return False + except Exception as e: + ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}") + return False + ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}") + return True + + def check_autoneg_supported(self, remote: bool = False) -> bool: + if not remote: + local_autoneg = self.get_ethtool_field("supports-auto-negotiation") + if local_autoneg is None: + ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}") + """Return autoneg status of the local interface""" + return local_autoneg + else: + if not self.verify_link_up(): + raise KsftSkipEx("Link is DOWN") + """Check remote auto-negotiation support status""" + partner_autoneg = False + if self.partner_netif is not None: + partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True) + if partner_autoneg is None: + ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}") + return partner_autoneg + + def get_common_link_modes(self) -> set[str]: + common_link_modes = [] + """Populate common link modes""" + link_modes = self.get_ethtool_field("supported-link-modes") + partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes") + if link_modes is None: + raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}") + if partner_link_modes is None: + raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}") + common_link_modes = set(link_modes) and set(partner_link_modes) + return common_link_modes + + def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]: + speed = [] + duplex = [] + """Check the link modes""" + for data in link_modes: + parts = data.split('/') + speed_value = re.match(r'\d+', parts[0]) + if speed_value: + speed.append(speed_value.group()) + else: + ksft_pr(f"No speed value found for interface {self.ifname}") + return None, None + duplex.append(parts[1].lower()) + return speed, duplex + + def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: + process = None + if not remote: + """Get the ethtool field value for the local interface""" + try: + process = ethtool(self.cfg.ifname, json=True) + except Exception as e: + ksft_pr("Required minimum ethtool version is 6.10") + ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}") + return None + else: + if not self.verify_link_up(): + return None + """Get the ethtool field value for the remote interface""" + self.cfg.require_cmd("ethtool", remote=True) + if self.partner_netif is None: + ksft_pr(f"Partner interface name is unavailable.") + return None + try: + process = ethtool(self.partner_netif, json=True, host=self.cfg.remote) + except Exception as e: + ksft_pr("Required minimum ethtool version is 6.10") + ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}") + return None + json_data = process[0] + """Check if the field exist in the json data""" + if field not in json_data: + raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}') + return json_data[field] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c new file mode 100644 index 000000000000..8e502a1f8f9b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 + * + * On client: + * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201 + * + * Test data validation: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * tr \\n \\0 | \ + * head -c 5G | \ + * nc <server IP> 5201 -p 5201 + * + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + */ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/udmabuf.h> +#include <libmnl/libmnl.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <linux/ethtool_netlink.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include "ethtool-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +static char *server_ip; +static char *client_ip; +static char *port; +static size_t do_validation; +static int start_queue = -1; +static int num_queues = -1; +static char *ifname; +static unsigned int ifindex; +static unsigned int dmabuf_id; + +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static struct memory_buffer *udmabuf_alloc(size_t size) +{ + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; + + ctx = malloc(sizeof(*ctx)); + if (!ctx) + error(1, ENOMEM, "malloc failed"); + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) + error(1, errno, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) + error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) + error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) + error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) + error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + + return ctx; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); +} + +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); +} + +void validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + int errors = 0; + size_t i; + + for (i = 0; i < size; i++) { + if (ptr[i] != seed) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + seed, ptr[i], i); + errors++; + if (errors > 20) + error(1, 0, "validation failed."); + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); +} + +static int rxq_num(int ifindex) +{ + struct ethtool_channels_get_req *req; + struct ethtool_channels_get_rsp *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + int num = -1; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_channels_get_req_alloc(); + ethtool_channels_get_req_set_header_dev_index(req, ifindex); + rsp = ethtool_channels_get(ys, req); + if (rsp) + num = rsp->rx_count + rsp->combined_count; + ethtool_channels_get_req_free(req); + ethtool_channels_get_rsp_free(rsp); + + ynl_sock_destroy(ys); + + return num; +} + +#define run_command(cmd, ...) \ + ({ \ + char command[256]; \ + memset(command, 0, sizeof(command)); \ + snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ + fprintf(stderr, "Running: %s\n", command); \ + system(command); \ + }) + +static int reset_flow_steering(void) +{ + /* Depending on the NIC, toggling ntuple off and on might not + * be allowed. Additionally, attempting to delete existing filters + * will fail if no filters are present. Therefore, do not enforce + * the exit status. + */ + + run_command("sudo ethtool -K %s ntuple off >&2", ifname); + run_command("sudo ethtool -K %s ntuple on >&2", ifname); + run_command( + "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", + ifname, ifname); + return 0; +} + +static const char *tcp_data_split_str(int val) +{ + switch (val) { + case 0: + return "off"; + case 1: + return "auto"; + case 2: + return "on"; + default: + return "?"; + } +} + +static int configure_headersplit(bool on) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + /* 0 - off, 1 - auto, 2 - on */ + ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL failed: %s\n", ys->err.msg); + ethtool_rings_set_req_free(req); + + if (ret == 0) { + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + if (get_rsp) + fprintf(stderr, "TCP header split: %s\n", + tcp_data_split_str(get_rsp->tcp_data_split)); + ethtool_rings_get_rsp_free(get_rsp); + } + + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_rss(void) +{ + return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static int configure_channels(unsigned int rx, unsigned int tx) +{ + return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); +} + +static int configure_flow_steering(struct sockaddr_in6 *server_sin) +{ + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + ifname, + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static void enable_reuseaddr(int fd) +{ + int opt = 1; + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +int do_server(struct memory_buffer *mem) +{ + char ctrl_data[sizeof(int) * 20000]; + struct netdev_queue_id *queues; + size_t non_page_aligned_frags = 0; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; + size_t page_aligned_frags = 0; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *tmp_mem = NULL; + struct ynl_sock *ys; + char iobuf[819200]; + char buffer[256]; + int socket_fd; + int client_fd; + size_t i = 0; + int ret; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); + + if (reset_flow_steering()) + error(1, 0, "Failed to reset flow steering\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to enable TCP header split\n"); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "Failed to configure rss\n"); + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering(&server_sin)) + error(1, 0, "Failed to configure flow steering\n"); + + sleep(1); + + queues = malloc(sizeof(*queues) * num_queues); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + tmp_mem = malloc(mem->size); + if (!tmp_mem) + error(1, ENOMEM, "malloc failed"); + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) + error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + enable_reuseaddr(socket_fd); + + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin6_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + + ret = listen(socket_fd, 1); + if (ret) + error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + + client_addr_len = sizeof(client_addr); + + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin6_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin6_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + fprintf(stderr, "recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + continue; + } + if (ret == 0) { + fprintf(stderr, "client exited\n"); + goto cleanup; + } + + i++; + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stderr, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stderr, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) + error(1, 0, + "received on wrong dmabuf_id: flow steering error\n"); + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + if (do_validation) + validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size); + else + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) + error(1, 0, + "SO_DEVMEM_DONTNEED not enough tokens"); + } + if (!is_devmem) + error(1, 0, "flow steering error\n"); + + fprintf(stderr, "total_received=%lu\n", total_received); + } + + fprintf(stderr, "%s: ok\n", TEST_PREFIX); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + + free(tmp_mem); + close(client_fd); + close(socket_fd); + ynl_sock_destroy(ys); + + return 0; +} + +void run_devmem_tests(void) +{ + struct netdev_queue_id *queues; + struct memory_buffer *mem; + struct ynl_sock *ys; + size_t i = 0; + + mem = provider->alloc(getpagesize() * NUM_PAGES); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "rss error\n"); + + queues = calloc(num_queues, sizeof(*queues)); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Binding empty queues array should have failed\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (configure_headersplit(0)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Configure dmabuf with header split off should have failed\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + /* Deactivating a bound queue should not be legal */ + if (!configure_channels(num_queues, num_queues - 1)) + error(1, 0, "Deactivating a bound queue should be illegal.\n"); + + /* Closing the netlink socket does an implicit unbind */ + ynl_sock_destroy(ys); + + provider->free(mem); +} + +int main(int argc, char *argv[]) +{ + struct memory_buffer *mem; + int is_server = 0, opt; + int ret; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case '?': + fprintf(stderr, "unknown option: %c\n", optopt); + break; + } + } + + if (!ifname) + error(1, 0, "Missing -f argument\n"); + + ifindex = if_nametoindex(ifname); + + if (!server_ip && !client_ip) { + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 0) + error(1, 0, "couldn't detect number of queues\n"); + if (num_queues < 2) + error(1, 0, + "number of device queues is too low\n"); + /* make sure can bind to multiple queues */ + start_queue = num_queues / 2; + num_queues /= 2; + } + + if (start_queue < 0 || num_queues < 0) + error(1, 0, "Both -t and -q are required\n"); + + run_devmem_tests(); + return 0; + } + + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 2) + error(1, 0, "number of device queues is too low\n"); + + num_queues = 1; + start_queue = rxq_num(ifindex) - num_queues; + + if (start_queue < 0) + error(1, 0, "couldn't detect number of queues\n"); + + fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); + } + + for (; optind < argc; optind++) + fprintf(stderr, "extra arguments: %s\n", argv[optind]); + + if (start_queue < 0) + error(1, 0, "Missing -t argument\n"); + + if (num_queues < 0) + error(1, 0, "Missing -q argument\n"); + + if (!server_ip) + error(1, 0, "Missing -s argument\n"); + + if (!port) + error(1, 0, "Missing -p argument\n"); + + mem = provider->alloc(getpagesize() * NUM_PAGES); + ret = is_server ? do_server(mem) : 1; + provider->free(mem); + + return ret; +} diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py new file mode 100644 index 000000000000..efd921180532 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +#Introduction: +#This file has basic link layer tests for generic NIC drivers. +#The test comprises of auto-negotiation, speed and duplex checks. +# +#Setup: +#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) +# +# DUT PC Partner PC +#┌───────────────────────┐ ┌──────────────────────────┐ +#│ │ │ │ +#│ │ │ │ +#│ ┌───────────┐ │ │ +#│ │DUT NIC │ Eth │ │ +#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ +#│ └───────────┘ │ │ +#│ │ │ │ +#│ │ │ │ +#└───────────────────────┘ └──────────────────────────┘ +# +#Configurations: +#Required minimum ethtool version is 6.10 (supports json) +#Default values: +#time_delay = 8 #time taken to wait for transitions to happen, in seconds. + +import time +import argparse +from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq +from lib.py import KsftFailEx, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import LinkConfig + +def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: + if link_config.partner_netif is None: + KsftSkipEx("Partner interface is not available") + if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True): + KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}") + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + +def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None: + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + """Verifying the autonegotiation state in partner""" + partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True) + if partner_autoneg_output is None: + KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}") + partner_autoneg_state = "on" if partner_autoneg_output is True else "off" + + ksft_eq(partner_autoneg_state, expected_state) + + """Verifying the autonegotiation state of local""" + autoneg_output = link_config.get_ethtool_field("auto-negotiation") + if autoneg_output is None: + KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}") + actual_state = "on" if autoneg_output is True else "off" + + ksft_eq(actual_state, expected_state) + + """Verifying the link establishment""" + link_available = link_config.get_ethtool_field("link-detected") + if link_available is None: + KsftSkipEx(f"Link status not available for interface {cfg.ifname}") + if link_available != True: + raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation") + +def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None: + _pre_test_checks(cfg, link_config) + for state in ["off", "on"]: + if not link_config.set_autonegotiation_state(state, remote=True): + raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}") + if not link_config.set_autonegotiation_state(state): + raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}") + time.sleep(time_delay) + verify_autonegotiation(cfg, state, link_config) + +def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None: + _pre_test_checks(cfg, link_config) + common_link_modes = link_config.common_link_modes + if not common_link_modes: + KsftSkipEx("No common link modes exist") + speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) + + if speeds and duplex_modes and len(speeds) == len(duplex_modes): + for idx in range(len(speeds)): + speed = speeds[idx] + duplex = duplex_modes[idx] + if not link_config.set_speed_and_duplex(speed, duplex): + raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}") + time.sleep(time_delay) + if not link_config.verify_speed_and_duplex(speed, duplex): + raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}") + else: + if not speeds or not duplex_modes: + KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}") + else: + KsftSkipEx("Mismatch in the number of speeds and duplex modes") + +def main() -> None: + parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver") + parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') + args = parser.parse_args() + time_delay = args.time_delay + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + link_config = LinkConfig(cfg) + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,)) + link_config.reset_interface() + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py new file mode 100644 index 000000000000..201403b76ea3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_performance.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +#Introduction: +#This file has basic performance test for generic NIC drivers. +#The test comprises of throughput check for TCP and UDP streams. +# +#Setup: +#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) +# +# DUT PC Partner PC +#┌───────────────────────┐ ┌──────────────────────────┐ +#│ │ │ │ +#│ │ │ │ +#│ ┌───────────┐ │ │ +#│ │DUT NIC │ Eth │ │ +#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ +#│ └───────────┘ │ │ +#│ │ │ │ +#│ │ │ │ +#└───────────────────────┘ └──────────────────────────┘ +# +#Configurations: +#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote. +#Required minimum ethtool version is 6.10 +#Change the below configuration based on your hw needs. +# """Default values""" +#time_delay = 8 #time taken to wait for transitions to happen, in seconds. +#test_duration = 10 #performance test duration for the throughput check, in seconds. +#send_throughput_threshold = 80 #percentage of send throughput required to pass the check +#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check + +import time +import json +import argparse +from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true +from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic +from lib.py import NetDrvEpEnv, bkg, wait_port_listen +from lib.py import cmd +from lib.py import LinkConfig + +class TestConfig: + def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None: + self.time_delay = time_delay + self.test_duration = test_duration + self.send_throughput_threshold = send_throughput_threshold + self.receive_throughput_threshold = receive_throughput_threshold + +def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: + if not link_config.verify_link_up(): + KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + common_link_modes = link_config.common_link_modes + if common_link_modes is None: + KsftSkipEx("No common link modes found") + if link_config.partner_netif == None: + KsftSkipEx("Partner interface is not available") + if link_config.check_autoneg_supported(): + KsftSkipEx("Auto-negotiation not supported by local") + if link_config.check_autoneg_supported(remote=True): + KsftSkipEx("Auto-negotiation not supported by remote") + cfg.require_cmd("iperf3", remote=True) + +def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None: + common_link_modes = link_config.common_link_modes + speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) + """Test duration in seconds""" + duration = test_config.test_duration + + ksft_pr(f"{protocol} test") + test_type = "-u" if protocol == "UDP" else "" + + send_throughput = [] + receive_throughput = [] + for idx in range(0, len(speeds)): + if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False: + raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}") + time.sleep(test_config.time_delay) + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + + send_command=f"{test_type} -b 0 -t {duration} --json" + receive_command=f"{test_type} -b 0 -t {duration} --reverse --json" + + send_result = traffic.run_remote_test(cfg, command=send_command) + if send_result.ret != 0: + raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}") + + send_output = send_result.stdout + send_data = json.loads(send_output) + + """Convert throughput to Mbps""" + send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2)) + ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps") + + receive_result = traffic.run_remote_test(cfg, command=receive_command) + if receive_result.ret != 0: + raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}") + + receive_output = receive_result.stdout + receive_data = json.loads(receive_output) + + """Convert throughput to Mbps""" + receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2)) + ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps") + + """Check whether throughput is not below the threshold (default values set at start)""" + for idx in range(0, len(speeds)): + send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100) + receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100) + ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") + ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") + +def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: + _pre_test_checks(cfg, link_config) + check_throughput(cfg, link_config, test_config, 'TCP', traffic) + +def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: + _pre_test_checks(cfg, link_config) + check_throughput(cfg, link_config, test_config, 'UDP', traffic) + +def main() -> None: + parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver") + parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') + parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.') + parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.') + parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.') + args=parser.parse_args() + test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt) + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + traffic = GenerateTraffic(cfg) + link_config = LinkConfig(cfg) + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, )) + link_config.reset_interface() + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 9d7adb3cf33b..ca8a7edff3dd 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -3,10 +3,11 @@ import datetime import random -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt +import re +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily -from lib.py import KsftSkipEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import rand_port from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure @@ -96,6 +97,13 @@ def _send_traffic_check(cfg, port, name, params): f"traffic on inactive queues ({name}): " + str(cnts)) +def _ntuple_rule_check(cfg, rule_id, ctx_id): + """Check that ntuple rule references RSS context ID""" + text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout + pattern = f"RSS Context (ID: )?{ctx_id}" + ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule") + + def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" @@ -215,7 +223,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): defer(ethtool, f"-X {cfg.ifname} default") else: other_key = 'noise' - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") @@ -238,6 +246,32 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): else: raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + if not main_ctx: + ethtool(f"-L {cfg.ifname} combined 4") + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1" + try: + # this targets queue 4, which doesn't exist + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})") + # change the table to target queues 0 and 2 + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") + # ntuple rule therefore targets queues 1 and 3 + ntuple2 = ethtool_create(cfg, "-N", flow) + # should replace existing filter + ksft_eq(ntuple, ntuple2) + _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), + 'noise' : (0, 2) }) + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + def test_rss_resize(cfg): """Test resizing of the RSS table. @@ -429,10 +463,12 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + _ntuple_rule_check(cfg, ntuple, ctx_id) + for i in range(ctx_cnt): _send_traffic_check(cfg, ports[i], f"context {i}", { 'target': (2+i*2, 3+i*2), @@ -516,7 +552,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) @@ -569,7 +605,7 @@ def test_rss_context_overlap(cfg, other_ctx=0): port = rand_port() if other_ctx: - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") @@ -587,7 +623,7 @@ def test_rss_context_overlap(cfg, other_ctx=0): # Now create a rule for context 1 and make sure traffic goes to a subset if other_ctx: ntuple.exec() - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") @@ -606,6 +642,72 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_delete_rss_context_busy(cfg): + """ + Test that deletion returns -EBUSY when an rss context is being used + by an ntuple filter. + """ + + require_ntuple(cfg) + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # attempt to delete in-use context + try: + ctx_deleter.exec_only() + ctx_deleter.cancel() + raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}") + except CmdExitFailure: + pass + + +def test_rss_ntuple_addition(cfg): + """ + Test that the queue offset (ring_cookie) of an ntuple rule is added + to the queue number read from the indirection table. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + # Use queue 0 for normal traffic + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2" + try: + ntuple_id = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported") + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3), + 'empty' : (1,), + 'noise' : (0,) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.ethnl = EthtoolFamily() @@ -616,7 +718,8 @@ def main() -> None: test_rss_context, test_rss_context4, test_rss_context32, test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, - test_rss_context_out_of_order, test_rss_context4_create_with_cfg], + test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_delete_rss_context_busy, test_rss_ntuple_addition], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..da5af2c680fa 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -2,7 +2,7 @@ import time -from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg class GenerateTraffic: def __init__(self, env, port=None): @@ -23,6 +23,24 @@ class GenerateTraffic: self.stop(verbose=True) raise Exception("iperf3 traffic did not ramp up") + def run_remote_test(self, env: object, port=None, command=None): + if port is None: + port = rand_port() + try: + server_cmd = f"iperf3 -s 1 -p {port} --one-off" + with bkg(server_cmd, host=env.remote): + #iperf3 opens TCP connection as default in server + #-u to be specified in client command for UDP + wait_port_listen(port, host=env.remote) + except Exception as e: + raise Exception(f"Unexpected error occurred while running server command: {e}") + try: + client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}" + proc = cmd(client_cmd) + return proc + except Exception as e: + raise Exception(f"Unexpected error occurred while running client command: {e}") + def _wait_pkts(self, pkt_cnt=None, pps=None): """ Wait until we've seen pkt_cnt or until traffic ramps up to pps. diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh index 89b55e946eed..36055279ba92 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh @@ -116,7 +116,7 @@ dev_del_test() log_test "Device delete" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 160891dcb4bc..db5806d189bb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -595,7 +595,7 @@ irif_disabled_test() log_test "Ingress RIF disabled" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid ip link set dev $rp1 nomaster __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge @@ -645,7 +645,7 @@ erif_disabled_test() log_test "Egress RIF disabled" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge devlink_trap_action_set $trap_name "drop" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 190c1b6b5365..5d6d88b600f0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -202,7 +202,7 @@ mtu_value_is_too_small_test() mtu_restore $rp2 - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower } @@ -235,7 +235,7 @@ __ttl_value_is_too_small_test() log_test "TTL value is too small: TTL=$ttl_val" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower } @@ -299,7 +299,7 @@ __mc_reverse_path_forwarding_test() log_test "Multicast reverse path forwarding: $desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower } @@ -347,7 +347,7 @@ __reject_route_test() log_test "Reject route: $desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid ip route del unreachable $unreachable tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower } @@ -542,7 +542,7 @@ ipv4_lpm_miss_test() log_test "LPM miss: IPv4" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid vrf_without_routes_destroy } @@ -569,7 +569,7 @@ ipv6_lpm_miss_test() log_test "LPM miss: IPv6" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid vrf_without_routes_destroy } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 0bd5ffc218ac..29a672c2270f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -45,63 +45,52 @@ source $lib_dir/devlink_lib.sh h1_create() { simple_if_init $h1 192.0.2.1/24 + defer simple_if_fini $h1 192.0.2.1/24 + mtu_set $h1 10000 + defer mtu_restore $h1 ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 -} - -h1_destroy() -{ - ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 - - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/24 + defer ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 } h2_create() { simple_if_init $h2 198.51.100.1/24 + defer simple_if_fini $h2 198.51.100.1/24 + mtu_set $h2 10000 + defer mtu_restore $h2 ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 -} - -h2_destroy() -{ - ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 - - mtu_restore $h2 - simple_if_fini $h2 198.51.100.1/24 + defer ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 } router_create() { ip link set dev $rp1 up + defer ip link set dev $rp1 down + ip link set dev $rp2 up + defer ip link set dev $rp2 down __addr_add_del $rp1 add 192.0.2.2/24 + defer __addr_add_del $rp1 del 192.0.2.2/24 + __addr_add_del $rp2 add 198.51.100.2/24 + defer __addr_add_del $rp2 del 198.51.100.2/24 + mtu_set $rp1 10000 + defer mtu_restore $rp1 + mtu_set $rp2 10000 + defer mtu_restore $rp2 ip -4 route add blackhole 198.51.100.100 + defer ip -4 route del blackhole 198.51.100.100 devlink trap set $DEVLINK_DEV trap blackhole_route action trap -} - -router_destroy() -{ - devlink trap set $DEVLINK_DEV trap blackhole_route action drop - - ip -4 route del blackhole 198.51.100.100 - - mtu_restore $rp2 - mtu_restore $rp1 - __addr_add_del $rp2 del 198.51.100.2/24 - __addr_add_del $rp1 del 192.0.2.2/24 - - ip link set dev $rp2 down - ip link set dev $rp1 down + defer devlink trap set $DEVLINK_DEV trap blackhole_route action drop } setup_prepare() @@ -114,7 +103,11 @@ setup_prepare() rp1_mac=$(mac_get $rp1) + # Reload to ensure devlink-trap settings are back to default. + defer devlink_reload + vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -122,21 +115,6 @@ setup_prepare() router_create } -cleanup() -{ - pre_cleanup - - router_destroy - - h2_destroy - h1_destroy - - vrf_cleanup - - # Reload to ensure devlink-trap settings are back to default. - devlink_reload -} - rate_limits_test() { RET=0 @@ -214,7 +192,10 @@ __rate_test() # by the policer. Make sure measured received rate is about 1000 pps log_info "=== Tx rate: Highest, Policer rate: 1000 pps ===" + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! sleep 5 # Take measurements when rate is stable @@ -229,13 +210,16 @@ __rate_test() check_err $? "Expected non-zero policer drop rate, got 0" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop # Send packets at a rate of 1000 pps and make sure they are not dropped # by the policer log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ===" + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec + defer stop_traffic $! sleep 5 # Take measurements when rate is stable @@ -244,7 +228,7 @@ __rate_test() check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop # Unbind the policer and send packets at highest possible rate. Make # sure they are not dropped by the policer and that the measured @@ -253,7 +237,10 @@ __rate_test() devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! rate=$(trap_rate_get) (( rate > 1000 )) @@ -265,7 +252,7 @@ __rate_test() check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop log_test "Trap policer rate" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index e9a82cae8c9a..4ac1dae92d0f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -176,7 +176,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } @@ -207,7 +207,7 @@ no_matching_tunnel_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh index 878125041fc3..fce885184404 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -176,7 +176,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } @@ -207,7 +207,7 @@ no_matching_tunnel_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index 5f6eb965cfd1..7aca8e5922cf 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -183,7 +183,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } @@ -253,7 +253,7 @@ corrupted_packet_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh index f6c16cbb6cf7..4599c331240b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh @@ -188,7 +188,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } @@ -262,7 +262,7 @@ corrupted_packet_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index fee74f215cec..d5b6f2cc9a29 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -58,65 +58,62 @@ source qos_lib.sh h1_create() { simple_if_init $h1 + defer simple_if_fini $h1 + mtu_set $h1 10000 + defer mtu_restore $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 ip link set dev $h1.111 type vlan egress-qos-map 0:1 } -h1_destroy() -{ - vlan_destroy $h1 111 - - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 + mtu_set $h2 10000 + defer mtu_restore $h2 vlan_create $h2 222 v$h2 192.0.2.65/28 + defer vlan_destroy $h2 222 ip link set dev $h2.222 type vlan egress-qos-map 0:2 } -h2_destroy() -{ - vlan_destroy $h2 222 - - mtu_restore $h2 - simple_if_fini $h2 -} - h3_create() { simple_if_init $h3 + defer simple_if_fini $h3 + mtu_set $h3 10000 + defer mtu_restore $h3 vlan_create $h3 111 v$h3 192.0.2.34/28 - vlan_create $h3 222 v$h3 192.0.2.66/28 -} - -h3_destroy() -{ - vlan_destroy $h3 222 - vlan_destroy $h3 111 + defer vlan_destroy $h3 111 - mtu_restore $h3 - simple_if_fini $h3 + vlan_create $h3 222 v$h3 192.0.2.66/28 + defer vlan_destroy $h3 222 } switch_create() { ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 10000 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 10000 + defer mtu_restore $swp2 # prio n -> TC n, strict scheduling lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7 + defer lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 + lldptool -T -i $swp3 -V ETS-CFG tsa=$( )"0:strict,"$( )"1:strict,"$( @@ -129,85 +126,90 @@ switch_create() sleep 1 ip link set dev $swp3 up + defer ip link set dev $swp3 down + mtu_set $swp3 10000 + defer mtu_restore $swp3 + tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 101: vlan_create $swp1 111 + defer vlan_destroy $swp1 111 + vlan_create $swp2 222 + defer vlan_destroy $swp2 222 + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 + vlan_create $swp3 222 + defer vlan_destroy $swp3 222 ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 ip link set dev br111 addrgenmode none + ip link set dev br111 up + defer ip link set dev br111 down + ip link set dev $swp1.111 master br111 + defer ip link set dev $swp1.111 nomaster + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster ip link add name br222 type bridge vlan_filtering 0 + defer ip link del dev br222 ip link set dev br222 addrgenmode none + ip link set dev br222 up + defer ip link set dev br222 down + ip link set dev $swp2.222 master br222 + defer ip link set dev $swp2.222 nomaster + ip link set dev $swp3.222 master br222 + defer ip link set dev $swp3.222 nomaster # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + defer devlink_pool_size_thtype_restore 0 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + defer devlink_pool_size_thtype_restore 4 devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + defer devlink_port_pool_th_restore $swp2 0 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp2 2 ingress devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 2 egress + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 -} - -switch_destroy() -{ - devlink_port_pool_th_restore $swp3 4 - devlink_tc_bind_pool_th_restore $swp3 2 egress - devlink_tc_bind_pool_th_restore $swp3 1 egress - - devlink_tc_bind_pool_th_restore $swp2 2 ingress - devlink_port_pool_th_restore $swp2 0 - - devlink_tc_bind_pool_th_restore $swp1 1 ingress - devlink_port_pool_th_restore $swp1 0 - - devlink_pool_size_thtype_restore 4 - devlink_pool_size_thtype_restore 0 - - ip link del dev br222 - ip link del dev br111 - - vlan_destroy $swp3 222 - vlan_destroy $swp3 111 - vlan_destroy $swp2 222 - vlan_destroy $swp1 111 - - tc qdisc del dev $swp3 root handle 101: - mtu_restore $swp3 - ip link set dev $swp3 down - lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 - - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer devlink_port_pool_th_restore $swp3 4 } setup_prepare() @@ -224,6 +226,7 @@ setup_prepare() h3mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -231,18 +234,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.34 " from H1" @@ -261,21 +252,38 @@ rel() " } +__run_hi_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_2 "$what")) + check_err $? "Could not get high enough $what ingress rate" + + echo ${uc_rate[@]} +} + +run_hi_measure_rate() +{ + in_defer_scope __run_hi_measure_rate "$@" +} + test_ets_strict() { RET=0 # Run high-prio traffic on its own. - start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac local -a rate_2 - rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2")) - check_err $? "Could not get high enough prio-2 ingress rate" + rate_2=($(run_hi_measure_rate "prio 2")) local rate_2_in=${rate_2[0]} local rate_2_eg=${rate_2[1]} - stop_traffic # $h2.222 # Start low-prio stream. start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac + defer stop_traffic $! local -a rate_1 rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1")) @@ -290,14 +298,9 @@ test_ets_strict() check_err $(bc <<< "$rel21 > 105") # Start the high-prio stream--now both streams run. - start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac - rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1")) - check_err $? "Could not get high enough prio-2 ingress rate with prio-1" + rate_3=($(run_hi_measure_rate "prio 2+1")) local rate_3_in=${rate_3[0]} local rate_3_eg=${rate_3[1]} - stop_traffic # $h2.222 - - stop_traffic # $h1.111 # High-prio should have about the same throughput whether or not # low-prio is in the system. diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 5ac4f795e333..2b5d2c2751d5 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -69,127 +69,103 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { simple_if_init $h1 + defer simple_if_fini $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 ip link set dev $h1.111 type vlan egress-qos-map 0:1 } -h1_destroy() -{ - vlan_destroy $h1 111 - - simple_if_fini $h1 -} - h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 -} - -h2_destroy() -{ - vlan_destroy $h2 111 - - simple_if_fini $h2 + defer vlan_destroy $h2 111 } switch_create() { # pools # ----- + # devlink_pool_size_thtype_restore needs to be done first so that we can + # reset the various limits to values that are only valid for the + # original static / dynamic setting. devlink_pool_size_thtype_save 1 - devlink_pool_size_thtype_save 6 - - devlink_port_pool_th_save $swp1 1 - devlink_port_pool_th_save $swp2 6 - - devlink_tc_bind_pool_th_save $swp1 1 ingress - devlink_tc_bind_pool_th_save $swp2 1 egress - devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 1 + + devlink_pool_size_thtype_save 6 devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 6 # $swp1 # ----- ip link set dev $swp1 up + defer ip link set dev $swp1 down + vlan_create $swp1 111 + defer vlan_destroy $swp1 111 ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + devlink_port_pool_th_save $swp1 1 devlink_port_pool_th_set $swp1 1 16 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress + + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + defer devlink_port_pool_th_restore $swp1 1 tc qdisc replace dev $swp1 root handle 1: \ ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp1 root + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + defer dcb buffer set dev $swp1 prio-buffer all:0 # $swp2 # ----- ip link set dev $swp2 up + defer ip link set dev $swp2 down + vlan_create $swp2 111 + defer vlan_destroy $swp2 111 ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + devlink_port_pool_th_save $swp2 6 devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE + defer devlink_tc_bind_pool_th_restore $swp2 1 egress + + devlink_tc_bind_pool_th_save $swp2 1 egress devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE + defer devlink_port_pool_th_restore $swp2 6 tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \ burst 128K limit 500M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 11: \ ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp2 parent 1:1 handle 11: # bridge # ------ ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 + ip link set dev $swp1.111 master br1 + defer ip link set dev $swp1.111 nomaster + ip link set dev br1 up + defer ip link set dev br1 down ip link set dev $swp2.111 master br1 -} - -switch_destroy() -{ - # Do this first so that we can reset the limits to values that are only - # valid for the original static / dynamic setting. - devlink_pool_size_thtype_restore 6 - devlink_pool_size_thtype_restore 1 - - # bridge - # ------ - - ip link set dev $swp2.111 nomaster - - ip link set dev br1 down - ip link set dev $swp1.111 nomaster - ip link del dev br1 - - # $swp2 - # ----- - - tc qdisc del dev $swp2 parent 1:1 handle 11: - tc qdisc del dev $swp2 root - - devlink_tc_bind_pool_th_restore $swp2 1 egress - devlink_port_pool_th_restore $swp2 6 - - vlan_destroy $swp2 111 - ip link set dev $swp2 down - - # $swp1 - # ----- - - dcb buffer set dev $swp1 prio-buffer all:0 - tc qdisc del dev $swp1 root - - devlink_tc_bind_pool_th_restore $swp1 1 ingress - devlink_port_pool_th_restore $swp1 1 - - vlan_destroy $swp1 111 - ip link set dev $swp1 down + defer ip link set dev $swp2.111 nomaster } setup_prepare() @@ -203,23 +179,13 @@ setup_prepare() h2mac=$(mac_get $h2) vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.34 " h1->h2" @@ -251,6 +217,7 @@ max_descriptors() log_info "Send many small packets, packet size = $pktsize bytes" start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + defer stop_traffic $! # Sleep to wait for congestion. sleep 5 @@ -268,9 +235,6 @@ max_descriptors() check_err $(bc <<< "$perc_used < $exp_perc_used") \ "Expected > $exp_perc_used% of descriptors, handle $perc_used%" - stop_traffic - sleep 1 - log_test "Maximum descriptors usage. The percentage used is $perc_used%" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 6d892de43fa8..cd4a5c21360c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -73,122 +73,114 @@ source qos_lib.sh h1_create() { simple_if_init $h1 192.0.2.65/28 - mtu_set $h1 10000 -} + defer simple_if_fini $h1 192.0.2.65/28 -h1_destroy() -{ - mtu_restore $h1 - simple_if_fini $h1 192.0.2.65/28 + mtu_set $h1 10000 + defer mtu_restore $h1 } h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 + mtu_set $h2 10000 + defer mtu_restore $h2 vlan_create $h2 111 v$h2 192.0.2.129/28 + defer vlan_destroy $h2 111 ip link set dev $h2.111 type vlan egress-qos-map 0:1 } -h2_destroy() -{ - vlan_destroy $h2 111 - - mtu_restore $h2 - simple_if_fini $h2 -} - h3_create() { simple_if_init $h3 192.0.2.66/28 + defer simple_if_fini $h3 192.0.2.66/28 + mtu_set $h3 10000 + defer mtu_restore $h3 vlan_create $h3 111 v$h3 192.0.2.130/28 -} - -h3_destroy() -{ - vlan_destroy $h3 111 - - mtu_restore $h3 - simple_if_fini $h3 192.0.2.66/28 + defer vlan_destroy $h3 111 } switch_create() { ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 10000 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 10000 + defer mtu_restore $swp2 ip link set dev $swp3 up + defer ip link set dev $swp3 down + mtu_set $swp3 10000 + defer mtu_restore $swp3 vlan_create $swp2 111 + defer vlan_destroy $swp2 111 + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 3: + tc qdisc replace dev $swp3 parent 3:3 handle 33: \ prio bands 8 priomap 7 7 7 7 7 7 7 7 + defer tc qdisc del dev $swp3 parent 3:3 handle 33: ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 ip link set dev br1 addrgenmode none ip link set dev br1 up + ip link set dev $swp1 master br1 + defer ip link set dev $swp1 nomaster + ip link set dev $swp3 master br1 + defer ip link set dev $swp3 nomaster ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 ip link set dev br111 addrgenmode none ip link set dev br111 up + ip link set dev $swp2.111 master br111 + defer ip link set dev $swp2.111 nomaster + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + defer devlink_port_pool_th_restore $swp2 0 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp2 1 ingress devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 -} - -switch_destroy() -{ - devlink_port_pool_th_restore $swp3 4 - - devlink_tc_bind_pool_th_restore $swp2 1 ingress - devlink_port_pool_th_restore $swp2 0 - - devlink_tc_bind_pool_th_restore $swp1 0 ingress - devlink_port_pool_th_restore $swp1 0 - - ip link del dev br111 - ip link del dev br1 - - tc qdisc del dev $swp3 parent 3:3 handle 33: - tc qdisc del dev $swp3 root handle 3: - - vlan_destroy $swp3 111 - vlan_destroy $swp2 111 - - mtu_restore $swp3 - ip link set dev $swp3 down - - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer devlink_port_pool_th_restore $swp3 4 } setup_prepare() @@ -205,6 +197,7 @@ setup_prepare() h3mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -212,45 +205,45 @@ setup_prepare() switch_create } -cleanup() +ping_ipv4() { - pre_cleanup + ping_test $h2 192.0.2.130 +} - switch_destroy - h3_destroy - h2_destroy - h1_destroy +__run_uc_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what")) + check_err $? "Could not get high enough $what ingress rate" - vrf_cleanup + echo ${uc_rate[@]} } -ping_ipv4() +run_uc_measure_rate() { - ping_test $h2 192.0.2.130 + in_defer_scope __run_uc_measure_rate "$@" } test_mc_aware() { RET=0 - local -a uc_rate - start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac - uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) - check_err $? "Could not get high enough UC-only ingress rate" - stop_traffic + local -a uc_rate=($(run_uc_measure_rate "UC-only")) local ucth1=${uc_rate[1]} start_traffic $h1 192.0.2.65 bc bc + defer stop_traffic $! local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) - local -a uc_rate_2 - start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac - uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) - check_err $? "Could not get high enough UC+MC ingress rate" - stop_traffic + local -a uc_rate_2=($(run_uc_measure_rate "UC+MC")) local ucth2=${uc_rate_2[1]} local d1=$(date +%s) @@ -272,8 +265,6 @@ test_mc_aware() local mc_ir=$(rate $u0 $u1 $interval) local mc_er=$(rate $t0 $t1 $interval) - stop_traffic - log_test "UC performance under MC overload" echo "UC-only throughput $(humanize $ucth1)" @@ -297,6 +288,7 @@ test_uc_aware() RET=0 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) @@ -326,8 +318,6 @@ test_uc_aware() ((attempts == passes)) check_err $? - stop_traffic - log_test "MC performance under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" echo " egress UC throughput $(humanize ${uc_er})" diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 893a693ad805..45a569618424 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -186,10 +186,7 @@ bridge_vlan_flags_test() # If we did not handle references correctly, then this should produce a # trace - devlink dev reload "$DEVLINK_DEV" - - # Allow netdevices to be re-created following the reload - sleep 20 + devlink_reload log_test "bridge vlan flags" } @@ -923,12 +920,9 @@ devlink_reload_test() # devlink reload can be performed without errors RET=0 - devlink dev reload "$DEVLINK_DEV" - check_err $? "devlink reload failed" + devlink_reload log_test "devlink reload - last test" - - sleep 20 } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 139175fd03e7..4aaceb6b2b60 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -21,6 +21,7 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp2 root handle 3: ets_switch_create @@ -30,16 +31,27 @@ switch_create() # for the DWRR process. devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + defer devlink_port_pool_th_restore $swp2 4 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 7 egress + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 6 egress + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 5 egress # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is @@ -47,20 +59,6 @@ switch_create() # 1:1, which is the mapping currently hard-coded by the driver. } -switch_destroy() -{ - devlink_tc_bind_pool_th_restore $swp2 5 egress - devlink_tc_bind_pool_th_restore $swp2 6 egress - devlink_tc_bind_pool_th_restore $swp2 7 egress - devlink_port_pool_th_restore $swp2 4 - devlink_tc_bind_pool_th_restore $swp1 0 ingress - devlink_port_pool_th_restore $swp1 0 - - ets_switch_destroy - - tc qdisc del dev $swp2 root handle 3: -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 299e06a5808c..537d6baa77b7 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -75,6 +75,18 @@ source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh source mlxsw_lib.sh +stop_traffic_sleep() +{ + local pid=$1; shift + + # Issuing a kill still leaves a bunch of packets lingering in the + # buffers. This traffic then arrives at the point where a follow-up test + # is already running, and can confuse the test. Therefore sleep after + # stopping traffic to flush any leftover packets. + stop_traffic "$pid" + sleep 1 +} + ipaddr() { local host=$1; shift @@ -89,39 +101,31 @@ host_create() local host=$1; shift simple_if_init $dev + defer simple_if_fini $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 + tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact # Some of the tests in this suite use multicast traffic. As this traffic # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus @@ -137,15 +141,9 @@ h2_create() # Prevent this by adding a shaper which limits the traffic in $h2 to # 1Gbps. - tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \ + tc qdisc replace dev $h2 root handle 10: tbf rate 200mbit \ burst 128K limit 1G -} - -h2_destroy() -{ - tc qdisc del dev $h2 root handle 10: - tc qdisc del dev $h2 clsact - host_destroy $h2 + defer tc qdisc del dev $h2 root handle 10: } h3_create() @@ -153,40 +151,54 @@ h3_create() host_create $h3 3 } -h3_destroy() -{ - host_destroy $h3 -} - switch_create() { local intf local vlan ip link add dev br1_10 type bridge + defer ip link del dev br1_10 + ip link add dev br1_11 type bridge + defer ip link del dev br1_11 ip link add dev br2_10 type bridge + defer ip link del dev br2_10 + ip link add dev br2_11 type bridge + defer ip link del dev br2_11 for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf done for intf in $swp1 $swp4; do for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br1_$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up done done for intf in $swp2 $swp3 $swp5; do for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br2_$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up done done @@ -199,51 +211,27 @@ switch_create() done for intf in $swp3 $swp4; do - tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \ + tc qdisc replace dev $intf root handle 1: tbf rate 200mbit \ burst 128K limit 1G + defer tc qdisc del dev $intf root handle 1: done ip link set dev br1_10 up + defer ip link set dev br1_10 down + ip link set dev br1_11 up + defer ip link set dev br1_11 down + ip link set dev br2_10 up + defer ip link set dev br2_10 down + ip link set dev br2_11 up + defer ip link set dev br2_11 down local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size -} - -switch_destroy() -{ - local intf - local vlan - - devlink_port_pool_th_restore $swp3 8 - - ip link set dev br2_11 down - ip link set dev br2_10 down - ip link set dev br1_11 down - ip link set dev br1_10 down - - for intf in $swp4 $swp3; do - tc qdisc del dev $intf root handle 1: - done - - for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br2_11 - ip link del dev br2_10 - ip link del dev br1_11 - ip link del dev br1_10 + defer devlink_port_pool_th_restore $swp3 8 } setup_prepare() @@ -263,6 +251,7 @@ setup_prepare() h3_mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -270,18 +259,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10" @@ -372,6 +349,7 @@ build_backlog() local i=0 while :; do + sleep 1 local cur=$(busywait 1100 until_counter_is "> $cur" \ get_qdisc_backlog $vlan) local diff=$((size - cur)) @@ -449,6 +427,7 @@ __do_ecn_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! sleep 1 ecn_test_common "$name" "$get_nmarked" $vlan $limit @@ -460,9 +439,6 @@ __do_ecn_test() build_backlog $vlan $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_test() @@ -470,7 +446,8 @@ do_ecn_test() local vlan=$1; shift local limit=$1; shift - __do_ecn_test get_nmarked "$vlan" "$limit" + in_defer_scope \ + __do_ecn_test get_nmarked "$vlan" "$limit" } do_ecn_test_perband() @@ -479,10 +456,11 @@ do_ecn_test_perband() local limit=$1; shift mlxsw_only_on_spectrum 3+ || return - __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" + in_defer_scope \ + __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" } -do_ecn_nodrop_test() +__do_ecn_nodrop_test() { local vlan=$1; shift local limit=$1; shift @@ -490,6 +468,7 @@ do_ecn_nodrop_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! sleep 1 ecn_test_common "$name" get_nmarked $vlan $limit @@ -501,12 +480,15 @@ do_ecn_nodrop_test() build_backlog $vlan $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" +} - stop_traffic - sleep 1 +do_ecn_nodrop_test() +{ + in_defer_scope \ + __do_ecn_nodrop_test "$@" } -do_red_test() +__do_red_test() { local vlan=$1; shift local limit=$1; shift @@ -517,6 +499,7 @@ do_red_test() # is above limit. start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! # Pushing below the queue limit should work. RET=0 @@ -532,17 +515,21 @@ do_red_test() check_fail $? "Traffic went into backlog instead of being early-dropped" pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + backlog=$(get_qdisc_backlog $vlan) local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((-10 <= pct && pct <= 10)) - check_err $? "backlog $backlog / $limit expected <= 10% distance" + ((-15 <= pct && pct <= 15)) + check_err $? "backlog $backlog / $limit expected <= 15% distance" log_test "TC $((vlan - 10)): RED backlog > limit" +} - stop_traffic - sleep 1 +do_red_test() +{ + in_defer_scope \ + __do_red_test "$@" } -do_mc_backlog_test() +__do_mc_backlog_test() { local vlan=$1; shift local limit=$1; shift @@ -552,7 +539,10 @@ do_mc_backlog_test() RET=0 start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! + start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! qbl=$(busywait 5000 until_counter_is ">= 500000" \ get_qdisc_backlog $vlan) @@ -565,13 +555,16 @@ do_mc_backlog_test() get_mc_transmit_queue $vlan) check_err $? "MC backlog reported by qdisc not visible in ethtool" - stop_traffic - stop_traffic - log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } -do_mark_test() +do_mc_backlog_test() +{ + in_defer_scope \ + __do_mc_backlog_test "$@" +} + +__do_mark_test() { local vlan=$1; shift local limit=$1; shift @@ -586,6 +579,7 @@ do_mark_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! # Create a bit of a backlog and observe no mirroring due to marks. qevent_rule_install_$subtest @@ -600,7 +594,7 @@ do_mark_test() # Above limit, everything should be mirrored, we should see lots of # packets. build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null - busywait_for_counter 1100 +10000 \ + busywait_for_counter 1100 +2500 \ $fetch_counter > /dev/null check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" @@ -615,12 +609,15 @@ do_mark_test() else log_test "TC $((vlan - 10)): marked packets $subtest'd" fi +} - stop_traffic - sleep 1 +do_mark_test() +{ + in_defer_scope \ + __do_mark_test "$@" } -do_drop_test() +__do_drop_test() { local vlan=$1; shift local limit=$1; shift @@ -635,6 +632,7 @@ do_drop_test() RET=0 start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac + defer stop_traffic_sleep $! # Create a bit of a backlog and observe no mirroring due to drops. qevent_rule_install_$subtest @@ -651,25 +649,30 @@ do_drop_test() build_backlog $vlan $((3 * limit / 2)) udp >/dev/null base=$($fetch_counter) - send_packets $vlan udp 11 + send_packets $vlan udp 100 - now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter) - check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen" + now=$(busywait 1100 until_counter_is ">= $((base + 95))" $fetch_counter) + check_err $? "${trigger}ped packets not observed: 100 expected, $((now - base)) seen" # When no extra traffic is injected, there should be no mirroring. - busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null + busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter >/dev/null check_fail $? "Spurious packets observed" # When the rule is uninstalled, there should be no mirroring. qevent_rule_uninstall_$subtest - send_packets $vlan udp 11 - busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null - check_fail $? "Spurious packets observed after uninstall" + send_packets $vlan udp 100 + now=$(busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter) + check_fail $? "$((now - base)) spurious packets observed after uninstall" log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd" +} - stop_traffic - sleep 1 +do_drop_test() +{ + in_defer_scope \ + __do_drop_test "$@" } qevent_rule_install_mirror() diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 8ecddafa79b3..8902a115d9cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -20,8 +20,8 @@ source sch_red_core.sh # $BACKLOG2 are far enough not to overlap, so that we can assume that if we do # see (do not see) marking, it is actually due to the configuration of that one # TC, and not due to configuration of the other TC leaking over. -BACKLOG1=200000 -BACKLOG2=500000 +BACKLOG1=400000 +BACKLOG2=1000000 install_root_qdisc() { @@ -35,7 +35,7 @@ install_qdisc_tc0() tc qdisc add dev $swp3 parent 10:8 handle 108: red \ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ - probability 1.0 avpkt 8000 burst 38 "${args[@]}" + probability 1.0 avpkt 8000 burst 51 "${args[@]}" } install_qdisc_tc1() @@ -44,7 +44,7 @@ install_qdisc_tc1() tc qdisc add dev $swp3 parent 10:7 handle 107: red \ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ - probability 1.0 avpkt 8000 burst 63 "${args[@]}" + probability 1.0 avpkt 8000 burst 126 "${args[@]}" } install_qdisc() @@ -80,36 +80,34 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc do_ecn_test 10 $BACKLOG1 do_ecn_test 11 $BACKLOG2 - - uninstall_qdisc } ecn_test_perband() { install_qdisc ecn + defer uninstall_qdisc do_ecn_test_perband 10 $BACKLOG1 do_ecn_test_perband 11 $BACKLOG2 - - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc do_ecn_nodrop_test 10 $BACKLOG1 do_ecn_nodrop_test 11 $BACKLOG2 - - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc # Make sure that we get the non-zero value if there is any. local cur=$(busywait 1100 until_counter_is "> 0" \ @@ -120,50 +118,44 @@ red_test() do_red_test 10 $BACKLOG1 do_red_test 11 $BACKLOG2 - - uninstall_qdisc } mc_backlog_test() { install_qdisc + defer uninstall_qdisc # Note that the backlog numbers here do not correspond to RED # configuration, but are arbitrary. do_mc_backlog_test 10 $BACKLOG1 do_mc_backlog_test 11 $BACKLOG2 - - uninstall_qdisc } red_mirror_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc do_drop_mirror_test 10 $BACKLOG1 early_drop do_drop_mirror_test 11 $BACKLOG2 early_drop - - uninstall_qdisc } red_trap_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc do_drop_trap_test 10 $BACKLOG1 early_drop do_drop_trap_test 11 $BACKLOG2 early_drop - - uninstall_qdisc } ecn_mirror_test() { install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc do_mark_mirror_test 10 $BACKLOG1 do_mark_mirror_test 11 $BACKLOG2 - - uninstall_qdisc } bail_on_lldpad "configure DCB" "configure Qdiscs" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index 159108d02895..e9043771787b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -32,45 +32,51 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc + do_ecn_test 10 $BACKLOG - uninstall_qdisc } ecn_test_perband() { install_qdisc ecn + defer uninstall_qdisc + do_ecn_test_perband 10 $BACKLOG - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc + do_ecn_nodrop_test 10 $BACKLOG - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc + do_red_test 10 $BACKLOG - uninstall_qdisc } mc_backlog_test() { install_qdisc + defer uninstall_qdisc + # Note that the backlog value here does not correspond to RED # configuration, but is arbitrary. do_mc_backlog_test 10 $BACKLOG - uninstall_qdisc } red_mirror_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc + do_drop_mirror_test 10 $BACKLOG - uninstall_qdisc } bail_on_lldpad "configure DCB" "configure Qdiscs" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh index 0c47faff9274..c068e6c2a580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -22,20 +22,34 @@ SB_ITC=0 h1_create() { simple_if_init $h1 192.0.1.1/24 + tc qdisc add dev $h1 clsact + + # Add egress filter on $h1 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h1 egress pref 2 handle 102 matchall action drop } h1_destroy() { + tc filter del dev $h1 egress pref 2 handle 102 matchall action drop + tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.1.1/24 } h2_create() { simple_if_init $h2 192.0.1.2/24 + tc qdisc add dev $h2 clsact + + # Add egress filter on $h2 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h2 egress pref 1 handle 101 matchall action drop } h2_destroy() { + tc filter del dev $h2 egress pref 1 handle 101 matchall action drop + tc qdisc del dev $h2 clsact simple_if_fini $h2 192.0.1.2/24 } @@ -101,6 +115,11 @@ port_pool_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ @@ -109,11 +128,6 @@ port_pool_test() devlink sb occupancy snapshot $DEVLINK_DEV RET=0 - max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) - check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress pool" - - RET=0 max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress pool" @@ -122,6 +136,11 @@ port_pool_test() max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress pool" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass } port_tc_ip_test() @@ -129,6 +148,11 @@ port_tc_ip_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ @@ -139,17 +163,17 @@ port_tc_ip_test() RET=0 max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress TC - IP packet" - - RET=0 - max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) - check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress TC - IP packet" RET=0 max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress TC - IP packet" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass } port_tc_arp_test() @@ -157,6 +181,9 @@ port_tc_arp_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q @@ -166,17 +193,15 @@ port_tc_arp_test() RET=0 max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress TC - ARP packet" - - RET=0 - max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) - check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress TC - ARP packet" RET=0 max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress TC - ARP packet" + + tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 83a0210e7544..bc7ea2df49fb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -218,7 +218,7 @@ psample_capture_start() psample_capture_stop() { - { kill %% && wait %%; } 2>/dev/null + kill_process %% } __tc_sample_rate_test() @@ -499,7 +499,7 @@ tc_sample_md_out_tc_occ_test() backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]') # Kill mausezahn. - { kill %% && wait %%; } 2>/dev/null + kill_process %% psample_capture_stop diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 06021b2059b7..b175f4d966e5 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -20,22 +20,26 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") # Simple script to test dynamic targets in netconsole SRCIF="" # to be populated later -SRCIP=192.168.1.1 +SRCIP=192.0.2.1 DSTIF="" # to be populated later -DSTIP=192.168.1.2 +DSTIP=192.0.2.2 PORT="6666" MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" TARGET=$(mktemp -u netcons_XXXXX) DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) NETCONS_CONFIGFS="/sys/kernel/config/netconsole" NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" # IDs for netdevsim NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" # Used to create and delete namespaces source "${SCRIPTDIR}"/../../net/lib.sh @@ -43,7 +47,6 @@ source "${SCRIPTDIR}"/../../net/net_helper.sh # Create netdevsim interfaces create_ifaces() { - local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" @@ -122,6 +125,8 @@ function cleanup() { # delete netconsole dynamic reconfiguration echo 0 > "${NETCONS_PATH}"/enabled + # Remove key + rmdir "${KEY_PATH}" # Remove the configfs entry rmdir "${NETCONS_PATH}" @@ -136,6 +141,18 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + function listen_port_and_save_to() { local OUTPUT=${1} # Just wait for 2 seconds @@ -146,6 +163,10 @@ function listen_port_and_save_to() { function validate_result() { local TMPFILENAME="$1" + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -158,6 +179,12 @@ function validate_result() { exit "${ksft_fail}" fi + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" @@ -185,6 +212,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then + echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -d "${NETCONS_CONFIGFS}" ]; then echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 exit "${ksft_skip}" @@ -220,6 +252,8 @@ trap cleanup EXIT set_network # Create a dynamic target for netconsole create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data # Listed for netconsole port inside the namespace and destination interface listen_port_and_save_to "${OUTPUT_FILE}" & # Wait for socat to start and listen to the port. diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 5bace0b7fb57..07b7c46d3311 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -4,11 +4,14 @@ TEST_PROGS = devlink.sh \ devlink_in_netns.sh \ devlink_trap.sh \ ethtool-coalesce.sh \ + ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ ethtool-ring.sh \ fib.sh \ + fib_notifications.sh \ hw_stats_l3.sh \ + macsec-offload.sh \ nexthop.sh \ peer.sh \ psample.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config index adf45a3a78b4..5117c78ddf0a 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/config +++ b/tools/testing/selftests/drivers/net/netdevsim/config @@ -1,6 +1,7 @@ CONFIG_DUMMY=y CONFIG_GENEVE=m CONFIG_IPV6=y +CONFIG_MACSEC=m CONFIG_NETDEVSIM=m CONFIG_NET_SCH_MQPRIO=y CONFIG_NET_SCH_MULTIQ=y diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh new file mode 100644 index 000000000000..bc210dc6ad2d --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +FEATS=" + tx-checksum-ip-generic + tx-scatter-gather + tx-tcp-segmentation + generic-segmentation-offload + generic-receive-offload" + +for feat in $FEATS ; do + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".active" 2>/dev/null) + check $? "$s" true + + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".fixed" 2>/dev/null) + check $? "$s" false +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh index 8d91191a098c..9896580c3d85 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -94,7 +94,7 @@ route_addition_check() sleep 1 $IP route add $route dev dummy1 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications $offload_failed rm -f $outfile @@ -148,7 +148,7 @@ route_deletion_check() sleep 1 $IP route del $route dev dummy1 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications rm -f $outfile @@ -191,7 +191,7 @@ route_replacement_check() sleep 1 $IP route replace $route dev dummy2 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications rm -f $outfile diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh new file mode 100755 index 000000000000..98033e6667d2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +MACSEC_NETDEV=macsec_nsim + +set -o pipefail + +if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then + echo "SKIP: netdevsim doesn't support MACsec offload" + exit 4 +fi + +if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then + echo "SKIP: couldn't create macsec device" + exit 4 +fi +ip link del $MACSEC_NETDEV + +# +# test macsec offload API +# + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null +check $? '' '' 1 + +ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + key 00 0123456789abcdef0123456789abcdef +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null +check $? '' '' 1 + +# can't disable macsec offload when SAs are configured +ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null +check $? '' '' 1 + +ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null +check $? '' '' 1 + +# toggle macsec offload via rtnetlink +ip link set "${MACSEC_NETDEV}2" type macsec offload off +check $? + +ip link set "${MACSEC_NETDEV}2" type macsec offload mac +check $? + +# toggle macsec offload via genetlink +ip macsec offload "${MACSEC_NETDEV}2" off +check $? + +ip macsec offload "${MACSEC_NETDEV}2" mac +check $? + +for dev in ${MACSEC_NETDEV}{,2,3} ; do + ip link del $dev + check $? +done + + +# +# test ethtool features when toggling offload +# + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload off +TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)" + +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ] +check $? + +ip link del $MACSEC_NETDEV + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec +check $? + +TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ] +check $? + +ip link set $MACSEC_NETDEV type macsec offload mac +check $? + +TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ] +check $? + + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..9c5473abbd78 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -8,25 +8,28 @@ from lib.py import cmd import glob -def sys_get_queues(ifname) -> int: - folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') return len(folders) -def nl_get_queues(cfg, nl): +def nl_get_queues(cfg, nl, qtype='rx'): queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) if queues: - return len([q for q in queues if q['type'] == 'rx']) + return len([q for q in queues if q['type'] == qtype]) return None def get_queues(cfg, nl) -> None: - queues = nl_get_queues(cfg, nl) - if not queues: - raise KsftSkipEx('queue-get not supported by device') + snl = NetdevFamily(recv_size=4096) - expected = sys_get_queues(cfg.dev['ifname']) - ksft_eq(queues, expected) + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) def addremove_queues(cfg, nl) -> None: @@ -57,7 +60,7 @@ def addremove_queues(cfg, nl) -> None: def main() -> None: - with NetDrvEnv(__file__, queue_count=3) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py new file mode 100755 index 000000000000..11310f19bfa0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +from lib.py import EthtoolFamily, NetshaperFamily +from lib.py import NetDrvEnv +from lib.py import NlError +from lib.py import cmd + +def get_shapers(cfg, nl_shaper) -> None: + try: + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Default configuration: no shapers configured. + ksft_eq(len(shapers), 0) + +def get_caps(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Each device implementing shaper support must support some + # features in at least a scope. + ksft_true(len(caps)> 0) + +def set_qshapers(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps") + + cfg.queues = True; + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + cfg.rx_type = 'rx' + cfg.nr_queues = channels['rx-count'] + else: + cfg.rx_type = 'combined' + cfg.nr_queues = channels['combined-count'] + if cfg.nr_queues < 3: + raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}") + + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}) + + # Querying a specific shaper not yet configured must fail. + raised = False + try: + shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 0}}) + except (NlError): + raised = True + ksft_eq(raised, True) + + shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper_q1, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}]) + +def del_qshapers(cfg, nl_shaper) -> None: + if not cfg.queues: + raise KsftSkipEx("queue shapers not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def set_nshapers(cfg, nl_shaper) -> None: + # Check required features. + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'netdev'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested netdev scope shapers with weight") + + cfg.netdev = True; + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev', 'id': 0}, + 'bw-max': 100000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 100000}]) + +def del_nshapers(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def basic_groups(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by the device") + if cfg.nr_queues < 3: + raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}") + + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-weight' in caps: + raise KsftSkipEx("device does not support queue scope shapers with weight") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1 }) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting all the leaves shaper does not affect the node one + # when the latter has 'netdev' scope. + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 1) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + +def qgroups(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested queue scope shapers with weight") + + cfg.groups = True; + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}) + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}, + 'parent': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + + # Grouping to a specified, not existing node scope shaper must fail + raised = False + try: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 3}], + 'handle': {'scope':'node', 'id': node_id + 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + except (NlError): + raised = True + ksft_eq(raised, True) + + # Add to an existing node + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}], + 'handle': {'scope':'node', 'id': node_id}}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}]) + + # Finish and verify the complete cleanup. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def delegation(cfg, nl_shaper) -> None: + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("node scope shapers not supported by the device") + raise + if not 'support-nesting' in caps: + raise KsftSkipEx("device does not support node scope shapers nesting") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + # Create the nested node and validate the hierarchy + nested_node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 5000}) + nested_node_id = nested_node_handle['handle']['id'] + ksft_true(nested_node_id != node_id) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'node', 'id': nested_node_id}, + 'metric': 'bps', + 'bw-max': 5000}]) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': nested_node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}]) + + # Final cleanup. + for i in range(1, 4): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def queue_update(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + if not cfg.queues: + raise KsftSkipEx("device does not support queue scope") + + for i in range(3): + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}, + 'metric': 'bps', + 'bw-max': (i + 1) * 1000}) + # Delete a channel, with no shapers configured on top of the related + # queue: no changes expected + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 3000}]) + + # Delete a channel, with a shaper configured on top of the related + # queue: the shaper must be deleted, too + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Restore the original channels number, no expected changes + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Final cleanup. + for i in range(0, 2): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=4) as cfg: + cfg.queues = False + cfg.netdev = False + cfg.groups = False + cfg.nr_queues = 0 + ksft_run([get_shapers, + get_caps, + set_qshapers, + del_qshapers, + set_nshapers, + del_nshapers, + basic_groups, + qgroups, + delegation, + queue_update], args=(cfg, NetshaperFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 63e3c045a3b2..031ac9def6c0 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -110,6 +110,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -158,7 +175,7 @@ def check_down(cfg) -> None: def main() -> None: - with NetDrvEnv(__file__) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down], args=(cfg, )) diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index f0c0ff20d6cf..828b66a10c63 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +file_stressor diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index c647fd6a0446..66305fc34c60 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS := devpts_pts file_stressor TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 319567f0fae1..81db85a5cc16 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -57,7 +57,6 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) { int fd, ret, saved_errno, result = 1; size_t len; - ssize_t wret; struct binderfs_device device = { 0 }; struct binder_version version = { 0 }; char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c new file mode 100644 index 000000000000..1136f93a9977 --- /dev/null +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, + const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static inline int sys_fsmount(int fd, unsigned int flags, + unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +FIXTURE(file_stressor) { + int fd_tmpfs; + int nr_procs; + int max_fds; + pid_t *pids_openers; + pid_t *pids_getdents; + int *fd_proc_pid; +}; + +FIXTURE_SETUP(file_stressor) +{ + int fd_context; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + ASSERT_EQ(mkdir("/slab_typesafe_by_rcu", 0755), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + self->fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(self->fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + ASSERT_EQ(sys_move_mount(self->fd_tmpfs, "", -EBADF, "/slab_typesafe_by_rcu", MOVE_MOUNT_F_EMPTY_PATH), 0); + + self->nr_procs = sysconf(_SC_NPROCESSORS_ONLN); + self->pids_openers = malloc(sizeof(pid_t) * self->nr_procs); + ASSERT_NE(self->pids_openers, NULL); + self->pids_getdents = malloc(sizeof(pid_t) * self->nr_procs); + ASSERT_NE(self->pids_getdents, NULL); + self->fd_proc_pid = malloc(sizeof(int) * self->nr_procs); + ASSERT_NE(self->fd_proc_pid, NULL); + self->max_fds = 500; +} + +FIXTURE_TEARDOWN(file_stressor) +{ + for (int i = 0; i < self->nr_procs; i++) { + int wstatus; + pid_t pid; + + pid = waitpid(self->pids_openers[i], &wstatus, 0); + ASSERT_EQ(pid, self->pids_openers[i]); + ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus)); + + pid = waitpid(self->pids_getdents[i], &wstatus, 0); + ASSERT_EQ(pid, self->pids_getdents[i]); + ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus)); + } + free(self->pids_openers); + free(self->pids_getdents); + ASSERT_EQ(close(self->fd_tmpfs), 0); + + umount2("/slab_typesafe_by_rcu", 0); + ASSERT_EQ(rmdir("/slab_typesafe_by_rcu"), 0); +} + +TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) +{ + for (int i = 0; i < self->nr_procs; i++) { + pid_t pid_self; + + self->pids_openers[i] = fork(); + ASSERT_GE(self->pids_openers[i], 0); + + if (self->pids_openers[i] != 0) + continue; + + self->pids_openers[i] = getpid(); + for (;;) { + for (int i = 0; i < self->max_fds; i++) { + char path[PATH_MAX]; + int fd; + + sprintf(path, "/slab_typesafe_by_rcu/file-%d-%d", self->pids_openers[i], i); + fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0644); + if (fd < 0) + continue; + } + + close_range(3, ~0U, 0); + } + + exit(0); + } + + for (int i = 0; i < self->nr_procs; i++) { + char path[PATH_MAX]; + + sprintf(path, "/proc/%d/fd/", self->pids_openers[i]); + self->fd_proc_pid[i] = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); + ASSERT_GE(self->fd_proc_pid[i], 0); + } + + for (int i = 0; i < self->nr_procs; i++) { + self->pids_getdents[i] = fork(); + ASSERT_GE(self->pids_getdents[i], 0); + + if (self->pids_getdents[i] != 0) + continue; + + self->pids_getdents[i] = getpid(); + for (;;) { + char ents[1024]; + ssize_t nr_read; + + /* + * Concurrently read /proc/<pid>/fd/ which rougly does: + * + * f = fget_task_next(p, &fd); + * if (!f) + * break; + * data.mode = f->f_mode; + * fput(f); + * + * Which means that it'll try to get a reference to a + * file in another task's file descriptor table. + * + * Under heavy file load it is increasingly likely that + * the other task will manage to close @file and @file + * is being recycled due to SLAB_TYPEAFE_BY_RCU + * concurrently. This will trigger various warnings in + * the file reference counting code. + */ + do { + nr_read = syscall(SYS_getdents64, self->fd_proc_pid[i], ents, sizeof(ents)); + } while (nr_read >= 0); + + lseek(self->fd_proc_pid[i], 0, SEEK_SET); + } + + exit(0); + } + + ASSERT_EQ(clock_nanosleep(CLOCK_MONOTONIC, 0, &(struct timespec){ .tv_sec = 900 /* 15 min */ }, NULL), 0); + + for (int i = 0; i < self->nr_procs; i++) { + kill(self->pids_openers[i], SIGKILL); + kill(self->pids_getdents[i], SIGKILL); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore index 52ae618fdd98..e23a18c8b37f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/.gitignore +++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only dev_in_maps +set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 56b2b48a765b..e8d1adb021af 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := dev_in_maps +TEST_GEN_PROGS := dev_in_maps set_layers_via_fds CFLAGS := -Wall -Werror diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 2862aae58b79..3b796264223f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -17,32 +17,7 @@ #include "../../kselftest.h" #include "log.h" - -static int sys_fsopen(const char *fsname, unsigned int flags) -{ - return syscall(__NR_fsopen, fsname, flags); -} - -static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) -{ - return syscall(__NR_fsconfig, fd, cmd, key, value, aux); -} - -static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) -{ - return syscall(__NR_fsmount, fd, flags, attr_flags); -} -static int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return syscall(__NR_mount, src, tgt, fst, flags, data); -} -static int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); -} +#include "wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c new file mode 100644 index 000000000000..1d0ae785a667 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ // Use ll64 + +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" +#include "log.h" +#include "wrappers.h" + +FIXTURE(set_layers_via_fds) { +}; + +FIXTURE_SETUP(set_layers_via_fds) +{ + ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0); +} + +FIXTURE_TEARDOWN(set_layers_via_fds) +{ + umount2("/set_layers_via_fds", 0); + ASSERT_EQ(rmdir("/set_layers_via_fds"), 0); +} + +TEST_F(set_layers_via_fds, set_layers_via_fds) +{ + int fd_context, fd_tmpfs, fd_overlay; + int layer_fds[] = { [0 ... 8] = -EBADF }; + bool layers_found[] = { [0 ... 8] = false }; + size_t len = 0; + char *line = NULL; + FILE *f_mountinfo; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0); + + layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY); + ASSERT_GE(layer_fds[0], 0); + + layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY); + ASSERT_GE(layer_fds[1], 0); + + layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY); + ASSERT_GE(layer_fds[2], 0); + + layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY); + ASSERT_GE(layer_fds[3], 0); + + layer_fds[4] = openat(fd_tmpfs, "l3", O_DIRECTORY); + ASSERT_GE(layer_fds[4], 0); + + layer_fds[5] = openat(fd_tmpfs, "l4", O_DIRECTORY); + ASSERT_GE(layer_fds[5], 0); + + layer_fds[6] = openat(fd_tmpfs, "d1", O_DIRECTORY); + ASSERT_GE(layer_fds[6], 0); + + layer_fds[7] = openat(fd_tmpfs, "d2", O_DIRECTORY); + ASSERT_GE(layer_fds[7], 0); + + layer_fds[8] = openat(fd_tmpfs, "d3", O_DIRECTORY); + ASSERT_GE(layer_fds[8], 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + + ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0); + + f_mountinfo = fopen("/proc/self/mountinfo", "r"); + ASSERT_NE(f_mountinfo, NULL); + + while (getline(&line, &len, f_mountinfo) != -1) { + char *haystack = line; + + if (strstr(haystack, "workdir=/tmp/w")) + layers_found[0] = true; + if (strstr(haystack, "upperdir=/tmp/u")) + layers_found[1] = true; + if (strstr(haystack, "lowerdir+=/tmp/l1")) + layers_found[2] = true; + if (strstr(haystack, "lowerdir+=/tmp/l2")) + layers_found[3] = true; + if (strstr(haystack, "lowerdir+=/tmp/l3")) + layers_found[4] = true; + if (strstr(haystack, "lowerdir+=/tmp/l4")) + layers_found[5] = true; + if (strstr(haystack, "datadir+=/tmp/d1")) + layers_found[6] = true; + if (strstr(haystack, "datadir+=/tmp/d2")) + layers_found[7] = true; + if (strstr(haystack, "datadir+=/tmp/d3")) + layers_found[8] = true; + } + free(line); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(layers_found[i], true); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); + ASSERT_EQ(fclose(f_mountinfo), 0); +} + +TEST_F(set_layers_via_fds, set_500_layers_via_fds) +{ + int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower; + int layer_fds[500] = { [0 ... 499] = -EBADF }; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + char path[100]; + + sprintf(path, "l%d", i); + ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0); + layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY); + ASSERT_GE(layer_fds[i], 0); + } + + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); + fd_work = openat(fd_tmpfs, "w", O_DIRECTORY); + ASSERT_GE(fd_work, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY); + ASSERT_GE(fd_upper, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0); + fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY); + ASSERT_GE(fd_lower, 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0); + ASSERT_EQ(close(fd_work), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0); + ASSERT_EQ(close(fd_upper), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0); + ASSERT_EQ(close(fd_lower), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h new file mode 100644 index 000000000000..071b95fd2ac0 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// +#ifndef __SELFTEST_OVERLAYFS_WRAPPERS_H__ +#define __SELFTEST_OVERLAYFS_WRAPPERS_H__ + +#define _GNU_SOURCE + +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, + const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static inline int sys_fsmount(int fd, unsigned int flags, + unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +static inline int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return syscall(__NR_mount, src, tgt, fst, flags, data); +} + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#endif diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index c773334bbcc9..8eb6aa606a0d 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -27,7 +27,7 @@ static const char *const known_fs[] = { "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc new file mode 100644 index 000000000000..35e8d47d6072 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc @@ -0,0 +1,101 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test tracefs GID mount option +# requires: "[gid=<gid>]":README + +fail() { + local msg="$1" + + echo "FAILED: $msg" + exit_fail +} + +find_alternate_gid() { + local original_gid="$1" + tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3 +} + +mount_tracefs_with_options() { + local mount_point="$1" + local options="$2" + + mount -t tracefs -o "$options" nodev "$mount_point" + + setup +} + +unmount_tracefs() { + local mount_point="$1" + + # Need to make sure the mount isn't busy so that we can umount it + (cd $mount_point; finish_ftrace;) + + cleanup +} + +create_instance() { + local mount_point="$1" + local instance="$mount_point/instances/$(mktemp -u test-XXXXXX)" + + mkdir "$instance" + echo "$instance" +} + +remove_instance() { + local instance="$1" + + rmdir "$instance" +} + +check_gid() { + local mount_point="$1" + local expected_gid="$2" + + echo "Checking permission group ..." + + cd "$mount_point" + + for file in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable"; do + local gid=`stat -c "%g" $file` + if [ "$gid" -ne "$expected_gid" ]; then + cd - # Return to the previous working directory (tracefs root) + fail "$(realpath $file): Expected group $expected_gid; Got group $gid" + fi + done + + cd - # Return to the previous working directory (tracefs root) +} + +test_gid_mount_option() { + local mount_point=$(get_mount_point) + local mount_options=$(get_mnt_options "$mount_point") + local original_group=$(stat -c "%g" .) + local other_group=$(find_alternate_gid "$original_group") + + # Set up mount options with new GID for testing + local new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"` + if [ "$new_options" = "$mount_options" ]; then + new_options="$mount_options,gid=$other_group" + mount_options="$mount_options,gid=$original_group" + fi + + # Unmount existing tracefs instance and mount with new GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$new_options" + + check_gid "$mount_point" "$other_group" + + # Check that files created after the mount inherit the GID + local instance=$(create_instance "$mount_point") + check_gid "$instance" "$other_group" + remove_instance "$instance" + + # Unmount and remount with the original GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$mount_options" + check_gid "$mount_point" "$original_group" +} + +test_gid_mount_option + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc index 094419e190c2..e71cc3ad0bdf 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc @@ -1,24 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test file and directory ownership changes for eventfs +# requires: "[gid=<gid>]":README original_group=`stat -c "%g" .` original_owner=`stat -c "%u" .` -mount_point=`stat -c '%m' .` +local mount_point=$(get_mount_point) -# If stat -c '%m' does not work (e.g. busybox) or failed, try to use the -# current working directory (which should be a tracefs) as the mount point. -if [ ! -d "$mount_point" ]; then - if mount | grep -qw $PWD ; then - mount_point=$PWD - else - # If PWD doesn't work, that is an environmental problem. - exit_unresolved - fi -fi - -mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'` +mount_options=$(get_mnt_options "$mount_point") # find another owner and group that is not the original other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3` diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc new file mode 100644 index 000000000000..ffff8646733c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiler with function graph tracing +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer + +# The function graph tracer can now be run along side of the function +# profiler. But there was a bug that caused the combination of the two +# to crash. It also required the function graph tracer to be started +# first. +# +# This test triggers that bug +# +# We need both function_graph and profiling to run this test + +fail() { # mesg + echo $1 + exit_fail +} + +echo "Enabling function graph tracer:" +echo function_graph > current_tracer +echo "enable profiler" + +# Older kernels do not allow function_profile to be enabled with +# function graph tracer. If the below fails, mark it as unsupported +echo 1 > function_profile_enabled || exit_unsupported + +# Let it run for a bit to make sure nothing explodes +sleep 1 + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc index e34c0bdef3ed..4307d4eef417 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -29,7 +29,7 @@ set -e : "Test printing the error code in signed decimal format" echo 0 > options/funcgraph-retval-hex -count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l` +count=`cat trace | grep 'proc_reg_write' | grep -e '=-5 ' -e '= -5 ' | wc -l` if [ $count -eq 0 ]; then fail "Return value can not be printed in signed decimal format" fi diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 779f3e62ec90..84d6a9c7ad67 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -193,3 +193,28 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file # " Command: " and "^\n" => 13 test $(expr 13 + $pos) -eq $N } + +# Helper to get the tracefs mount point +get_mount_point() { + local mount_point=`stat -c '%m' .` + + # If stat -c '%m' does not work (e.g. busybox) or failed, try to use the + # current working directory (which should be a tracefs) as the mount point. + if [ ! -d "$mount_point" ]; then + if mount | grep -qw "$PWD"; then + mount_point=$PWD + else + # If PWD doesn't work, that is an environmental problem. + exit_unresolved + fi + fi + echo "$mount_point" +} + +# Helper function to retrieve mount options for a given mount point +get_mnt_options() { + local mnt_point="$1" + local opts=$(mount | grep -m1 "$mnt_point" | sed -e 's/.*(\(.*\)).*/\1/') + + echo "$opts" +}
\ No newline at end of file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index a16c6a6f6055..8f1c58f0c239 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -111,7 +111,7 @@ check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS if !grep -q 'kernel return probes support:' README; then check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS fi -check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS +check_error 'p vfs_read+20 ^$arg*' # NOFENTRY_ARGS check_error 'p vfs_read ^hoge' # NO_BTFARG check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) check_error 'r kfree ^$retval' # NO_RETVAL diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 38ae31bb07b5..0336353bd15f 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -18,6 +18,7 @@ TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh TEST_FILES := run-hid-tools-tests.sh +TEST_FILES += tests CXX ?= $(CROSS_COMPILE)g++ @@ -231,7 +232,7 @@ $(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT) $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@ -$(OUTPUT)/%.o: %.c $(BPF_SKELS) +$(OUTPUT)/%.o: %.c $(BPF_SKELS) hid_common.h $(call msg,CC,,$@) $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index 86f4d66379f7..1e979fb3542b 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -4,13 +4,6 @@ #include "hid_common.h" #include <bpf/bpf.h> -struct attach_prog_args { - int prog_fd; - unsigned int hid; - int retval; - int insert_head; -}; - struct hid_hw_request_syscall_args { __u8 data[10]; unsigned int hid; @@ -21,11 +14,8 @@ struct hid_hw_request_syscall_args { }; FIXTURE(hid_bpf) { - int dev_id; - int uhid_fd; + struct uhid_device hid; int hidraw_fd; - int hid_id; - pthread_t tid; struct hid *skel; struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */ }; @@ -54,10 +44,10 @@ static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) FIXTURE_TEARDOWN(hid_bpf) { void *uhid_err; - uhid_destroy(_metadata, self->uhid_fd); + uhid_destroy(_metadata, &self->hid); detach_bpf(self); - pthread_join(self->tid, &uhid_err); + pthread_join(self->hid.tid, &uhid_err); } #define TEARDOWN_LOG(fmt, ...) do { \ TH_LOG(fmt, ##__VA_ARGS__); \ @@ -66,23 +56,10 @@ FIXTURE_TEARDOWN(hid_bpf) { FIXTURE_SETUP(hid_bpf) { - time_t t; int err; - /* initialize random number generator */ - srand((unsigned int)time(&t)); - - self->dev_id = rand() % 1024; - - self->uhid_fd = setup_uhid(_metadata, self->dev_id); - - /* locate the uev, self, variant);ent file of the created device */ - self->hid_id = get_hid_id(self->dev_id); - ASSERT_GT(self->hid_id, 0) - TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); - - err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); - ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); + err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a36, rdesc, sizeof(rdesc)); + ASSERT_OK(err); } struct test_program { @@ -129,7 +106,7 @@ static void load_programs(const struct test_program programs[], ops_hid_id = bpf_map__initial_value(map, NULL); ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data"); - *ops_hid_id = self->hid_id; + *ops_hid_id = self->hid.hid_id; } /* we disable the auto-attach feature of all maps because we @@ -157,7 +134,7 @@ static void load_programs(const struct test_program programs[], hid__attach(self->skel); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -192,7 +169,7 @@ TEST_F(hid_bpf, raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* check that hid_first_event() was executed */ ASSERT_EQ(self->skel->data->callback_check, 42) TH_LOG("callback_check1"); @@ -208,7 +185,7 @@ TEST_F(hid_bpf, raw_event) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* check that hid_first_event() was executed */ ASSERT_EQ(self->skel->data->callback_check, 47) TH_LOG("callback_check1"); @@ -239,7 +216,7 @@ TEST_F(hid_bpf, subprog_raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -252,7 +229,7 @@ TEST_F(hid_bpf, subprog_raw_event) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -303,7 +280,7 @@ TEST_F(hid_bpf, test_attach_detach) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -326,14 +303,14 @@ TEST_F(hid_bpf, test_attach_detach) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); /* inject another event */ memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -352,7 +329,7 @@ TEST_F(hid_bpf, test_attach_detach) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -382,7 +359,7 @@ TEST_F(hid_bpf, test_hid_change_report) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -412,7 +389,7 @@ TEST_F(hid_bpf, test_hid_user_input_report_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ args.data[1] = 2; /* report ID */ args.data[2] = 42; /* report ID */ @@ -458,7 +435,7 @@ TEST_F(hid_bpf, test_hid_user_output_report_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ args.data[1] = 2; /* report ID */ args.data[2] = 42; /* report ID */ @@ -506,7 +483,7 @@ TEST_F(hid_bpf, test_hid_user_raw_request_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ prog_fd = bpf_program__fd(self->skel->progs.hid_user_raw_request); @@ -539,7 +516,7 @@ TEST_F(hid_bpf, test_hid_filter_raw_request_call) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -565,7 +542,7 @@ TEST_F(hid_bpf, test_hid_filter_raw_request_call) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); @@ -641,7 +618,7 @@ TEST_F(hid_bpf, test_hid_filter_output_report_call) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -667,7 +644,7 @@ TEST_F(hid_bpf, test_hid_filter_output_report_call) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); err = write(self->hidraw_fd, buf, 3); @@ -742,7 +719,7 @@ TEST_F(hid_bpf, test_multiply_events_wq) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -780,7 +757,7 @@ TEST_F(hid_bpf, test_multiply_events) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -816,7 +793,7 @@ TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call) buf[1] = 2; buf[2] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -867,7 +844,7 @@ TEST_F(hid_bpf, test_hid_attach_flags) /* inject one event */ buf[0] = 1; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h index f151f151a1ed..f77f69c6657d 100644 --- a/tools/testing/selftests/hid/hid_common.h +++ b/tools/testing/selftests/hid/hid_common.h @@ -19,6 +19,16 @@ __typeof__(b) _b = (b); \ _a < _b ? _a : _b; }) +struct uhid_device { + int dev_id; /* uniq (random) number to identify the device */ + int uhid_fd; + int hid_id; /* HID device id in the system */ + __u16 bus; + __u32 vid; + __u32 pid; + pthread_t tid; /* thread for reading uhid events */ +}; + static unsigned char rdesc[] = { 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ @@ -122,7 +132,9 @@ static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uh } } -static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) +static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb, + __u16 bus, __u32 vid, __u32 pid, __u8 *rdesc, + size_t rdesc_size) { struct uhid_event ev; char buf[25]; @@ -133,10 +145,10 @@ static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) ev.type = UHID_CREATE; strcpy((char *)ev.u.create.name, buf); ev.u.create.rd_data = rdesc; - ev.u.create.rd_size = sizeof(rdesc); - ev.u.create.bus = BUS_USB; - ev.u.create.vendor = 0x0001; - ev.u.create.product = 0x0a37; + ev.u.create.rd_size = rdesc_size; + ev.u.create.bus = bus; + ev.u.create.vendor = vid; + ev.u.create.product = pid; ev.u.create.version = 0; ev.u.create.country = 0; @@ -146,14 +158,14 @@ static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) return uhid_write(_metadata, fd, &ev); } -static void uhid_destroy(struct __test_metadata *_metadata, int fd) +static void uhid_destroy(struct __test_metadata *_metadata, struct uhid_device *hid) { struct uhid_event ev; memset(&ev, 0, sizeof(ev)); ev.type = UHID_DESTROY; - uhid_write(_metadata, fd, &ev); + uhid_write(_metadata, hid->uhid_fd, &ev); } static int uhid_event(struct __test_metadata *_metadata, int fd) @@ -281,7 +293,8 @@ static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid return 0; } -static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size) +static int uhid_send_event(struct __test_metadata *_metadata, struct uhid_device *hid, + __u8 *buf, size_t size) { struct uhid_event ev; @@ -294,36 +307,20 @@ static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, memcpy(ev.u.input2.data, buf, size); - return uhid_write(_metadata, fd, &ev); + return uhid_write(_metadata, hid->uhid_fd, &ev); } -static int setup_uhid(struct __test_metadata *_metadata, int rand_nb) +static bool match_sysfs_device(struct uhid_device *hid, const char *workdir, struct dirent *dir) { - int fd; - const char *path = "/dev/uhid"; - int ret; - - fd = open(path, O_RDWR | O_CLOEXEC); - ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd); - - ret = uhid_create(_metadata, fd, rand_nb); - ASSERT_EQ(0, ret) { - TH_LOG("create uhid device failed: %d", ret); - close(fd); - } - - return fd; -} - -static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir) -{ - const char *target = "0003:0001:0A37.*"; + char target[20] = ""; char phys[512]; char uevent[1024]; char temp[512]; int fd, nread; bool found = false; + snprintf(target, sizeof(target), "%04X:%04X:%04X.*", hid->bus, hid->vid, hid->pid); + if (fnmatch(target, dir->d_name, 0)) return false; @@ -334,7 +331,7 @@ static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *d if (fd < 0) return false; - sprintf(phys, "PHYS=%d", dev_id); + sprintf(phys, "PHYS=%d", hid->dev_id); nread = read(fd, temp, ARRAY_SIZE(temp)); if (nread > 0 && (strstr(temp, phys)) != NULL) @@ -345,7 +342,7 @@ static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *d return found; } -static int get_hid_id(int dev_id) +static int get_hid_id(struct uhid_device *hid) { const char *workdir = "/sys/devices/virtual/misc/uhid"; const char *str_id; @@ -360,10 +357,10 @@ static int get_hid_id(int dev_id) d = opendir(workdir); if (d) { while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) + if (!match_sysfs_device(hid, workdir, dir)) continue; - str_id = dir->d_name + sizeof("0003:0001:0A37."); + str_id = dir->d_name + sizeof("0000:0000:0000."); found = (int)strtol(str_id, NULL, 16); break; @@ -377,7 +374,7 @@ static int get_hid_id(int dev_id) return found; } -static int get_hidraw(int dev_id) +static int get_hidraw(struct uhid_device *hid) { const char *workdir = "/sys/devices/virtual/misc/uhid"; char sysfs[1024]; @@ -394,7 +391,7 @@ static int get_hidraw(int dev_id) continue; while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) + if (!match_sysfs_device(hid, workdir, dir)) continue; sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name); @@ -421,12 +418,12 @@ static int get_hidraw(int dev_id) return found; } -static int open_hidraw(int dev_id) +static int open_hidraw(struct uhid_device *hid) { int hidraw_number; char hidraw_path[64] = { 0 }; - hidraw_number = get_hidraw(dev_id); + hidraw_number = get_hidraw(hid); if (hidraw_number < 0) return hidraw_number; @@ -434,3 +431,44 @@ static int open_hidraw(int dev_id) sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number); return open(hidraw_path, O_RDWR | O_NONBLOCK); } + +static int setup_uhid(struct __test_metadata *_metadata, struct uhid_device *hid, + __u16 bus, __u32 vid, __u32 pid, const __u8 *rdesc, size_t rdesc_size) +{ + const char *path = "/dev/uhid"; + time_t t; + int ret; + + /* initialize random number generator */ + srand((unsigned int)time(&t)); + + hid->dev_id = rand() % 1024; + hid->bus = bus; + hid->vid = vid; + hid->pid = pid; + + hid->uhid_fd = open(path, O_RDWR | O_CLOEXEC); + ASSERT_GE(hid->uhid_fd, 0) TH_LOG("open uhid-cdev failed; %d", hid->uhid_fd); + + ret = uhid_create(_metadata, hid->uhid_fd, hid->dev_id, bus, vid, pid, + (__u8 *)rdesc, rdesc_size); + ASSERT_EQ(0, ret) { + TH_LOG("create uhid device failed: %d", ret); + close(hid->uhid_fd); + return ret; + } + + /* locate the uevent file of the created device */ + hid->hid_id = get_hid_id(hid); + ASSERT_GT(hid->hid_id, 0) + TH_LOG("Could not locate uhid device id: %d", hid->hid_id); + + ret = uhid_start_listener(_metadata, &hid->tid, hid->uhid_fd); + ASSERT_EQ(0, ret) { + TH_LOG("could not start udev listener: %d", ret); + close(hid->uhid_fd); + return ret; + } + + return 0; +} diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c index f8b4f7ff292c..821db37ba4bb 100644 --- a/tools/testing/selftests/hid/hidraw.c +++ b/tools/testing/selftests/hid/hidraw.c @@ -9,11 +9,8 @@ #endif /* HIDIOCREVOKE */ FIXTURE(hidraw) { - int dev_id; - int uhid_fd; + struct uhid_device hid; int hidraw_fd; - int hid_id; - pthread_t tid; }; static void close_hidraw(FIXTURE_DATA(hidraw) * self) { @@ -25,10 +22,10 @@ static void close_hidraw(FIXTURE_DATA(hidraw) * self) FIXTURE_TEARDOWN(hidraw) { void *uhid_err; - uhid_destroy(_metadata, self->uhid_fd); + uhid_destroy(_metadata, &self->hid); close_hidraw(self); - pthread_join(self->tid, &uhid_err); + pthread_join(self->hid.tid, &uhid_err); } #define TEARDOWN_LOG(fmt, ...) do { \ TH_LOG(fmt, ##__VA_ARGS__); \ @@ -37,25 +34,12 @@ FIXTURE_TEARDOWN(hidraw) { FIXTURE_SETUP(hidraw) { - time_t t; int err; - /* initialize random number generator */ - srand((unsigned int)time(&t)); + err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a37, rdesc, sizeof(rdesc)); + ASSERT_OK(err); - self->dev_id = rand() % 1024; - - self->uhid_fd = setup_uhid(_metadata, self->dev_id); - - /* locate the uev, self, variant);ent file of the created device */ - self->hid_id = get_hid_id(self->dev_id); - ASSERT_GT(self->hid_id, 0) - TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); - - err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); - ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); - - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -79,7 +63,7 @@ TEST_F(hidraw, raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -101,7 +85,7 @@ TEST_F(hidraw, raw_event_revoked) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -117,7 +101,7 @@ TEST_F(hidraw, raw_event_revoked) /* inject one other event */ buf[0] = 1; buf[1] = 43; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -161,7 +145,7 @@ TEST_F(hidraw, poll_revoked) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); while (true) { ready = poll(pfds, 1, 5000); diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index e5db897586bb..531228b849da 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -22,6 +22,9 @@ #define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used #define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used +/* do not define kfunc through vmlinux.h as this messes up our custom hack */ +#define BPF_NO_KFUNC_PROTOTYPES + #include "vmlinux.h" #undef hid_bpf_ctx @@ -91,31 +94,31 @@ struct hid_bpf_ops { /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, - const size_t __sz) __ksym; -extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; -extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; + const size_t __sz) __weak __ksym; +extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __weak __ksym; +extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __weak __ksym; extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *data, size_t buf__sz, enum hid_report_type type, - enum hid_class_request reqtype) __ksym; + enum hid_class_request reqtype) __weak __ksym; extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, - __u8 *buf, size_t buf__sz) __ksym; + __u8 *buf, size_t buf__sz) __weak __ksym; extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; /* bpf_wq implementation */ extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, int (callback_fn)(void *map, int *key, void *wq), - unsigned int flags__k, void *aux__ign) __ksym; + unsigned int flags__k, void *aux__ign) __weak __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/hid/run-hid-tools-tests.sh b/tools/testing/selftests/hid/run-hid-tools-tests.sh index bdae8464da86..af1682a53c27 100755 --- a/tools/testing/selftests/hid/run-hid-tools-tests.sh +++ b/tools/testing/selftests/hid/run-hid-tools-tests.sh @@ -2,24 +2,26 @@ # SPDX-License-Identifier: GPL-2.0 # Runs tests for the HID subsystem +KSELFTEST_SKIP_TEST=4 + if ! command -v python3 > /dev/null 2>&1; then echo "hid-tools: [SKIP] python3 not installed" - exit 77 + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest module not installed" - exit 77 + echo "hid: [SKIP] pytest module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest_tap" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest_tap module not installed" - exit 77 + echo "hid: [SKIP] pytest_tap module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import hidtools" > /dev/null 2>&1; then - echo "hid: [SKIP/ hid-tools module not installed" - exit 77 + echo "hid: [SKIP] hid-tools module not installed" + exit $KSELFTEST_SKIP_TEST fi TARGET=${TARGET:=.} diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index e7008f614ad7..6a3b8503264e 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -44,6 +44,11 @@ if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then exit $ksft_skip fi +if ! command -v cpupower &> /dev/null; then + echo $msg cpupower could not be found, please install it >&2 + exit $ksft_skip +fi + max_cpus=$(($(nproc)-1)) function run_test () { @@ -87,9 +92,9 @@ mkt_freq=${_mkt_freq}0 # Get the ranges from cpupower _min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ') -min_freq=$(($_min_freq / 1000)) +min_freq=$((_min_freq / 1000)) _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') -max_freq=$(($_max_freq / 1000)) +max_freq=$((_max_freq / 1000)) [ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq` diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile index fd6477911f24..84abeb2f0949 100644 --- a/tools/testing/selftests/iommu/Makefile +++ b/tools/testing/selftests/iommu/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += $(KHDR_INCLUDES) +LDLIBS += -lcap TEST_GEN_PROGS := TEST_GEN_PROGS += iommufd diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 4927b9add5ad..a1b2b657999d 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <asm/unistd.h> #include <stdlib.h> +#include <sys/capability.h> #include <sys/mman.h> #include <sys/eventfd.h> @@ -49,6 +51,9 @@ static __attribute__((constructor)) void setup_sizes(void) vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); assert(vrc == buffer); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } FIXTURE(iommufd) @@ -128,6 +133,11 @@ TEST_F(iommufd, cmd_length) TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length); TEST_LENGTH(iommu_option, IOMMU_OPTION, val64); TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved); + TEST_LENGTH(iommu_ioas_map_file, IOMMU_IOAS_MAP_FILE, iova); + TEST_LENGTH(iommu_viommu_alloc, IOMMU_VIOMMU_ALLOC, out_viommu_id); + TEST_LENGTH(iommu_vdevice_alloc, IOMMU_VDEVICE_ALLOC, virt_id); + TEST_LENGTH(iommu_ioas_change_process, IOMMU_IOAS_CHANGE_PROCESS, + __reserved); #undef TEST_LENGTH } @@ -186,6 +196,144 @@ TEST_F(iommufd, global_options) EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); } +static void drop_cap_ipc_lock(struct __test_metadata *_metadata) +{ + cap_t caps; + cap_value_t cap_list[1] = { CAP_IPC_LOCK }; + + caps = cap_get_proc(); + ASSERT_NE(caps, NULL); + ASSERT_NE(-1, + cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); + ASSERT_NE(-1, cap_set_proc(caps)); + cap_free(caps); +} + +static long get_proc_status_value(pid_t pid, const char *var) +{ + FILE *fp; + char buf[80], tag[80]; + long val = -1; + + snprintf(buf, sizeof(buf), "/proc/%d/status", pid); + fp = fopen(buf, "r"); + if (!fp) + return val; + + while (fgets(buf, sizeof(buf), fp)) + if (fscanf(fp, "%s %ld\n", tag, &val) == 2 && !strcmp(tag, var)) + break; + + fclose(fp); + return val; +} + +static long get_vm_pinned(pid_t pid) +{ + return get_proc_status_value(pid, "VmPin:"); +} + +static long get_vm_locked(pid_t pid) +{ + return get_proc_status_value(pid, "VmLck:"); +} + +FIXTURE(change_process) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(change_process) +{ + int accounting; +}; + +FIXTURE_SETUP(change_process) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + drop_cap_ipc_lock(_metadata); + if (variant->accounting != IOPT_PAGES_ACCOUNT_NONE) { + struct iommu_option set_limit_cmd = { + .size = sizeof(set_limit_cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .val64 = (variant->accounting == IOPT_PAGES_ACCOUNT_MM), + }; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &set_limit_cmd)); + } + + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); +} + +FIXTURE_TEARDOWN(change_process) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(change_process, account_none) +{ + .accounting = IOPT_PAGES_ACCOUNT_NONE, +}; + +FIXTURE_VARIANT_ADD(change_process, account_user) +{ + .accounting = IOPT_PAGES_ACCOUNT_USER, +}; + +FIXTURE_VARIANT_ADD(change_process, account_mm) +{ + .accounting = IOPT_PAGES_ACCOUNT_MM, +}; + +TEST_F(change_process, basic) +{ + pid_t parent = getpid(); + pid_t child; + __u64 iova; + struct iommu_ioas_change_process cmd = { + .size = sizeof(cmd), + }; + + /* Expect failure if non-file maps exist */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + test_ioctl_ioas_unmap(iova, PAGE_SIZE); + + /* Change process works in current process. */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Change process works in another process */ + child = fork(); + if (!child) { + int nlock = PAGE_SIZE / 1024; + + /* Parent accounts for locked memory before */ + ASSERT_EQ(nlock, get_vm_pinned(parent)); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(parent)); + ASSERT_EQ(0, get_vm_pinned(getpid())); + ASSERT_EQ(0, get_vm_locked(getpid())); + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Child accounts for locked memory after */ + ASSERT_EQ(0, get_vm_pinned(parent)); + ASSERT_EQ(0, get_vm_locked(parent)); + ASSERT_EQ(nlock, get_vm_pinned(getpid())); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(getpid())); + + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); +} + FIXTURE(iommufd_ioas) { int fd; @@ -220,6 +368,8 @@ FIXTURE_SETUP(iommufd_ioas) for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, &self->device_id); + test_cmd_dev_check_cache_all(self->device_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); self->base_iova = MOCK_APERTURE_START; } } @@ -360,9 +510,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); - /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + /* hwpt_invalidate does not support a parent hwpt */ num_inv = 1; - test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + test_err_hwpt_invalidate(EINVAL, parent_hwpt_id, inv_reqs, IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, sizeof(*inv_reqs), &num_inv); assert(!num_inv); @@ -1372,6 +1522,7 @@ FIXTURE_VARIANT(iommufd_mock_domain) { unsigned int mock_domains; bool hugepages; + bool file; }; FIXTURE_SETUP(iommufd_mock_domain) @@ -1384,9 +1535,12 @@ FIXTURE_SETUP(iommufd_mock_domain) ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); - for (i = 0; i != variant->mock_domains; i++) + for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], &self->hwpt_ids[i], &self->idev_ids[i]); + test_cmd_dev_check_cache_all(self->idev_ids[0], + IOMMU_TEST_DEV_CACHE_DEFAULT); + } self->hwpt_id = self->hwpt_ids[0]; self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; @@ -1410,26 +1564,45 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) { .mock_domains = 1, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) { .mock_domains = 2, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) { .mock_domains = 1, .hugepages = true, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) { .mock_domains = 2, .hugepages = true, + .file = false, }; +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file) +{ + .mock_domains = 1, + .hugepages = false, + .file = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file_hugepage) +{ + .mock_domains = 1, + .hugepages = true, + .file = true, +}; + + /* Have the kernel check that the user pages made it to the iommu_domain */ #define check_mock_iova(_ptr, _iova, _length) \ ({ \ @@ -1455,7 +1628,10 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) } \ }) -TEST_F(iommufd_mock_domain, basic) +static void +test_basic_mmap(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) { size_t buf_size = self->mmap_buf_size; uint8_t *buf; @@ -1478,6 +1654,40 @@ TEST_F(iommufd_mock_domain, basic) test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); } +static void +test_basic_file(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + int mfd_tmp; + int prot = PROT_READ | PROT_WRITE; + + /* Simple one page map */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + check_mock_iova(mfd_buffer, iova, PAGE_SIZE); + + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd_tmp); + ASSERT_NE(MAP_FAILED, buf); + + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size + 1, &iova); + + ASSERT_EQ(0, ftruncate(mfd_tmp, 0)); + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size, &iova); + + close(mfd_tmp); +} + +TEST_F(iommufd_mock_domain, basic) +{ + if (variant->file) + test_basic_file(_metadata, self, variant); + else + test_basic_mmap(_metadata, self, variant); +} + TEST_F(iommufd_mock_domain, ro_unshare) { uint8_t *buf; @@ -1513,9 +1723,13 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1532,7 +1746,12 @@ TEST_F(iommufd_mock_domain, all_aligns) size_t length = end - start; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } check_mock_iova(buf + start, iova, length); check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, end / PAGE_SIZE * PAGE_SIZE - @@ -1544,6 +1763,8 @@ TEST_F(iommufd_mock_domain, all_aligns) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, all_aligns_copy) @@ -1554,9 +1775,13 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1575,7 +1800,12 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) uint32_t mock_stdev_id; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } /* Add and destroy a domain while the area exists */ old_id = self->hwpt_ids[1]; @@ -1596,15 +1826,18 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, user_copy) { + void *buf = variant->file ? mfd_buffer : buffer; struct iommu_test_cmd access_cmd = { .size = sizeof(access_cmd), .op = IOMMU_TEST_OP_ACCESS_PAGES, .access_pages = { .length = BUFFER_SIZE, - .uptr = (uintptr_t)buffer }, + .uptr = (uintptr_t)buf }, }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), @@ -1623,9 +1856,13 @@ TEST_F(iommufd_mock_domain, user_copy) /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ test_ioctl_ioas_alloc(&ioas_id); - test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); - + if (variant->file) { + test_ioctl_ioas_map_id_file(ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_create_access(ioas_id, &access_cmd.id, MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); @@ -1635,12 +1872,17 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); /* Now replace the ioas with a new one */ test_ioctl_ioas_alloc(&new_ioas_id); - test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); + if (variant->file) { + test_ioctl_ioas_map_id_file(new_ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(new_ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); /* Destroy the old ioas and cleanup copied mapping */ @@ -1654,7 +1896,7 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = new_ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); test_cmd_destroy_access_pages( access_cmd.id, access_cmd.access_pages.out_access_pages_id); @@ -2386,4 +2628,332 @@ TEST_F(vfio_compat_mock_domain, huge_map) } } +FIXTURE(iommufd_viommu) +{ + int fd; + uint32_t ioas_id; + uint32_t stdev_id; + uint32_t hwpt_id; + uint32_t nested_hwpt_id; + uint32_t device_id; + uint32_t viommu_id; +}; + +FIXTURE_VARIANT(iommufd_viommu) +{ + unsigned int viommu; +}; + +FIXTURE_SETUP(iommufd_viommu) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + test_ioctl_set_default_memory_limit(); + + if (variant->viommu) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, NULL, + &self->device_id); + + /* Allocate a nesting parent hwpt */ + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &self->hwpt_id); + + /* Allocate a vIOMMU taking refcount of the parent hwpt */ + test_cmd_viommu_alloc(self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, + &self->viommu_id); + + /* Allocate a regular nested hwpt */ + test_cmd_hwpt_alloc_nested(self->device_id, self->viommu_id, 0, + &self->nested_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + } +} + +FIXTURE_TEARDOWN(iommufd_viommu) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_viommu, no_viommu) +{ + .viommu = 0, +}; + +FIXTURE_VARIANT_ADD(iommufd_viommu, mock_viommu) +{ + .viommu = 1, +}; + +TEST_F(iommufd_viommu, viommu_auto_destroy) +{ +} + +TEST_F(iommufd_viommu, viommu_negative_tests) +{ + uint32_t device_id = self->device_id; + uint32_t ioas_id = self->ioas_id; + uint32_t hwpt_id; + + if (self->device_id) { + /* Negative test -- invalid hwpt (hwpt_id=0) */ + test_err_viommu_alloc(ENOENT, device_id, 0, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + + /* Negative test -- not a nesting parent hwpt */ + test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id); + test_err_viommu_alloc(EINVAL, device_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + test_ioctl_destroy(hwpt_id); + + /* Negative test -- unsupported viommu type */ + test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id, + 0xdead, NULL); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->hwpt_id)); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->viommu_id)); + } else { + test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + } +} + +TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) +{ + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; + + if (self->device_id) { + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_err_hwpt_alloc_iopf( + ENOENT, dev_id, viommu_id, UINT32_MAX, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf( + EOPNOTSUPP, dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf( + dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + test_cmd_trigger_iopf(dev_id, fault_fd); + + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); + } +} + +TEST_F(iommufd_viommu, vdevice_alloc) +{ + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + + if (dev_id) { + /* Set vdev_id to 0x99, unset it, and set to 0x88 */ + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, + &vdev_id); + test_ioctl_destroy(vdev_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id); + test_ioctl_destroy(vdev_id); + } else { + test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL); + } +} + +TEST_F(iommufd_viommu, vdevice_cache) +{ + struct iommu_viommu_invalidate_selftest inv_reqs[2] = {}; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + uint32_t num_inv; + + if (dev_id) { + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + + test_cmd_dev_check_cache_all(dev_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 1, &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, NULL, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 0, &num_inv); + assert(!num_inv); + + /* Negative test: invalid cache_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid vdev_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x9; + inv_reqs[0].cache_id = 0; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 1; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid cache_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 2nd cache entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 1; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, 0); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 3rd and 4th cache entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 3; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 2); + test_cmd_dev_check_cache_all(dev_id, 0); + + /* Invalidate all cache entries for nested_dev_id[1] and verify */ + num_inv = 1; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache_all(dev_id, 0); + test_ioctl_destroy(vdev_id); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index c5d5e69452b0..64b1f8e1b0cf 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -47,6 +47,9 @@ static __attribute__((constructor)) void setup_buffer(void) buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } /* @@ -331,6 +334,42 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) return 0; } +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_file_domain) +{ + uint32_t ioas_id; + __u32 stdev_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + + if (_test_ioctl_ioas_map_file(self->fd, ioas_id, mfd, 0, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, stdev_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + return 0; +} + TEST_FAIL_NTH(basic_fail_nth, map_two_domains) { uint32_t ioas_id; @@ -576,12 +615,19 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) /* device.c */ TEST_FAIL_NTH(basic_fail_nth, device) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; struct iommu_test_hw_info info; + uint32_t fault_id, fault_fd; + uint32_t fault_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; uint32_t stdev_id; uint32_t idev_id; uint32_t hwpt_id; + uint32_t viommu_id; + uint32_t vdev_id; __u64 iova; self->fd = open("/dev/iommu", O_RDWR); @@ -624,6 +670,28 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) return -1; + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) + return -1; + + if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id)) + return -1; + + if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) + return -1; + + if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd)) + return -1; + close(fault_fd); + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &fault_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data))) + return -1; + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 40f6f14ce136..d979f5b0efe8 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,12 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +enum { + IOPT_PAGES_ACCOUNT_NONE = 0, + IOPT_PAGES_ACCOUNT_USER = 1, + IOPT_PAGES_ACCOUNT_MM = 2, +}; + #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) static inline void set_bit(unsigned int nr, unsigned long *addr) @@ -40,12 +46,28 @@ static inline bool test_bit(unsigned int nr, unsigned long *addr) static void *buffer; static unsigned long BUFFER_SIZE; +static void *mfd_buffer; +static int mfd; + static unsigned long PAGE_SIZE; #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) +{ + int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; + int mfd = memfd_create("buffer", mfd_flags); + + if (mfd <= 0) + return MAP_FAILED; + if (ftruncate(mfd, length)) + return MAP_FAILED; + *mfd_p = mfd; + return mmap(0, length, prot, flags, mfd, 0); +} + /* * Have the kernel check the refcount on pages. I don't know why a freshly * mmap'd anon non-compound page starts out with a ref of 3 @@ -234,6 +256,30 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \ }) +#define test_cmd_dev_check_cache(device_id, cache_id, expected) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_DEV_CHECK_CACHE, \ + .id = device_id, \ + .check_dev_cache = { \ + .id = cache_id, \ + .cache = expected, \ + }, \ + }; \ + ASSERT_EQ(0, ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_DEV_CHECK_CACHE), \ + &test_cmd)); \ + }) + +#define test_cmd_dev_check_cache_all(device_id, expected) \ + ({ \ + int c; \ + for (c = 0; c < MOCK_DEV_CACHE_NUM; c++) \ + test_cmd_dev_check_cache(device_id, c, expected); \ + }) + static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, uint32_t data_type, uint32_t lreq, uint32_t *nreqs) @@ -265,6 +311,38 @@ static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, data_type, lreq, nreqs)); \ }) +static int _test_cmd_viommu_invalidate(int fd, __u32 viommu_id, void *reqs, + uint32_t data_type, uint32_t lreq, + uint32_t *nreqs) +{ + struct iommu_hwpt_invalidate cmd = { + .size = sizeof(cmd), + .hwpt_id = viommu_id, + .data_type = data_type, + .data_uptr = (uint64_t)reqs, + .entry_len = lreq, + .entry_num = *nreqs, + }; + int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd); + *nreqs = cmd.entry_num; + return rc; +} + +#define test_cmd_viommu_invalidate(viommu, reqs, lreq, nreqs) \ + ({ \ + ASSERT_EQ(0, \ + _test_cmd_viommu_invalidate(self->fd, viommu, reqs, \ + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, \ + lreq, nreqs)); \ + }) +#define test_err_viommu_invalidate(_errno, viommu_id, reqs, data_type, lreq, \ + nreqs) \ + ({ \ + EXPECT_ERRNO(_errno, _test_cmd_viommu_invalidate( \ + self->fd, viommu_id, reqs, \ + data_type, lreq, nreqs)); \ + }) + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) { @@ -589,6 +667,47 @@ static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ iova, length, NULL)) +static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd, + size_t start, size_t length, __u64 *iova, + unsigned int flags) +{ + struct iommu_ioas_map_file cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &cmd); + *iova = cmd.iova; + return ret; +} + +#define test_ioctl_ioas_map_file(mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map_file(_errno, mfd, start, length, iova_p) \ + EXPECT_ERRNO( \ + _errno, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id_file(ioas_id, mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) { struct iommu_test_cmd memlimit_cmd = { @@ -762,3 +881,58 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) #define test_cmd_trigger_iopf(device_id, fault_fd) \ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) + +static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, + __u32 type, __u32 flags, __u32 *viommu_id) +{ + struct iommu_viommu_alloc cmd = { + .size = sizeof(cmd), + .flags = flags, + .type = type, + .dev_id = device_id, + .hwpt_id = hwpt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &cmd); + if (ret) + return ret; + if (viommu_id) + *viommu_id = cmd.out_viommu_id; + return 0; +} + +#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \ + ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) +#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) + +static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, + __u64 virt_id, __u32 *vdev_id) +{ + struct iommu_vdevice_alloc cmd = { + .size = sizeof(cmd), + .dev_id = idev_id, + .viommu_id = viommu_id, + .virt_id = virt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &cmd); + if (ret) + return ret; + if (vdev_id) + *vdev_id = cmd.out_vdevice_id; + return 0; +} + +#define test_cmd_vdevice_alloc(viommu_id, idev_id, virt_id, vdev_id) \ + ASSERT_EQ(0, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) +#define test_err_vdevice_alloc(_errno, viommu_id, idev_id, virt_id, vdev_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 960cf6a77198..41593d2e7de9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -55,6 +55,7 @@ LIBKVM_aarch64 += lib/aarch64/vgic.c LIBKVM_s390x += lib/s390x/diag318_test_handler.c LIBKVM_s390x += lib/s390x/processor.c LIBKVM_s390x += lib/s390x/ucall.c +LIBKVM_s390x += lib/s390x/facility.c LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c @@ -67,7 +68,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features +TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test @@ -156,6 +157,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs @@ -189,6 +191,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test +TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test @@ -241,13 +244,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ - $(KHDR_INCLUDES) + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) ifeq ($(ARCH),s390) CFLAGS += -march=z10 endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif ifeq ($(ARCH),arm64) tools_dir := $(top_srcdir)/tools arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 2582c49e525a..ff7a949fc96a 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -433,15 +433,15 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_BRK_INS, guest_sw_bp_handler); + ESR_ELx_EC_BRK64, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_WP_CURRENT, guest_wp_handler); + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SSTEP_CURRENT, guest_ss_handler); + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SVC64, guest_svc_handler); + ESR_ELx_EC_SVC64, guest_svc_handler); /* Specify bpn/wpn/ctx_bpn to be tested */ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c new file mode 100644 index 000000000000..8b7a80a51b1c --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/mmio_abort.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mmio_abort - Tests for userspace MMIO abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +extern char test_mmio_abort_insn; + +static void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); +} diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c index 943d65fc6b0b..58304bbc2036 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -150,7 +150,7 @@ static void test_guest_no_gicv3(void) vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_UNKNOWN, guest_undef_handler); + ESR_ELx_EC_UNKNOWN, guest_undef_handler); test_run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index d29b08198b42..ec33a8f9c908 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -544,9 +544,9 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_DABT, no_dabt_handler); + ESR_ELx_EC_DABT_CUR, no_dabt_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_IABT, no_iabt_handler); + ESR_ELx_EC_IABT_CUR, no_iabt_handler); } static void setup_gva_maps(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 61731a950def..eaa7655fefc1 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -54,6 +54,15 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) return res.a0; } +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; @@ -188,11 +197,94 @@ static void host_test_system_suspend(void) kvm_vm_free(vm); } +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + int main(void) { TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); + host_test_system_off2(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index 2a3fe7914b72..a79b7f18452d 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -68,6 +68,8 @@ struct test_feature_reg { } static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), REG_FTR_END, @@ -134,6 +136,13 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), @@ -200,6 +209,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), @@ -433,6 +443,101 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) } } +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -569,14 +674,16 @@ int main(void) test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs) + 2; + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + + MPAM_IDREG_TEST; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index d31b9f64ba14..f9c0c86d7e85 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -300,7 +300,7 @@ static void guest_sync_handler(struct ex_regs *regs) uint64_t esr, ec; esr = read_sysreg(esr_el1); - ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(esr); __GUEST_ASSERT(expected_ec == ec, "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", @@ -338,10 +338,10 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * Reading/writing the event count/type registers should cause * an UNDEFINED exception. */ - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); /* * The bit corresponding to the (unimplemented) counter in * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. @@ -425,7 +425,7 @@ static void create_vpmu_vm(void *guest_code) vpmu_vm.vm = vm_create(1); vm_init_descriptor_tables(vpmu_vm.vm); - for (ec = 0; ec < ESR_EC_NUM; ec++) { + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, guest_sync_handler); } diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ba0c8e996035..ce687f8d248f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -134,7 +134,7 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm) size); } - for (flag = 0; flag; flag <<= 1) { + for (flag = BIT(0); flag; flag <<= 1) { fd = __vm_create_guest_memfd(vm, page_size, flag); TEST_ASSERT(fd == -1 && errno == EINVAL, "guest_memfd() with flag '0x%lx' should fail with EINVAL", diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index bce73bcb973c..94bd6ed24cf3 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -20,7 +20,6 @@ #define SLEEPING_THREAD_NUM (1 << 4) #define FORK_NUM (1ULL << 9) #define DELAY_US_MAX 2000 -#define GUEST_CODE_PIO_PORT 4 sem_t *sem; diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index de977d131082..1e8d0d531fbd 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -12,6 +12,8 @@ #include <linux/stringify.h> #include <linux/types.h> +#include <asm/brk-imm.h> +#include <asm/esr.h> #include <asm/sysreg.h> @@ -100,19 +102,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -#define ESR_EC_NUM 64 -#define ESR_EC_SHIFT 26 -#define ESR_EC_MASK (ESR_EC_NUM - 1) - -#define ESR_EC_UNKNOWN 0x0 -#define ESR_EC_SVC64 0x15 -#define ESR_EC_IABT 0x21 -#define ESR_EC_DABT 0x25 -#define ESR_EC_HW_BP_CURRENT 0x31 -#define ESR_EC_SSTEP_CURRENT 0x33 -#define ESR_EC_WP_CURRENT 0x35 -#define ESR_EC_BRK_INS 0x3c - /* Access flag */ #define PTE_AF (1ULL << 10) diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include <linux/bitops.h> + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 481bd2fd6a32..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -32,4 +32,10 @@ static inline void cpu_relax(void) barrier(); } +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e247f99e0473..645200e95f89 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1049,6 +1049,11 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); } +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index fe4dc3693112..698e34f39241 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -450,7 +450,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) } struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; }; void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) @@ -469,7 +469,7 @@ void route_exception(struct ex_regs *regs, int vector) switch (vector) { case VECTOR_SYNC_CURRENT: case VECTOR_SYNC_LOWER_64: - ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(read_sysreg(esr_el1)); valid_ec = true; break; case VECTOR_IRQ_CURRENT: @@ -508,7 +508,7 @@ void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, assert(VECTOR_IS_SYNC(vector)); assert(vector < VECTOR_NUM); - assert(ec < ESR_EC_NUM); + assert(ec <= ESR_ELx_EC_MAX); handlers->exception_handlers[vector][ec] = handler; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b7df5f1d39..480e3a40d197 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -720,9 +720,6 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); - region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); @@ -1197,7 +1194,12 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot)); + struct userspace_mem_region *region = memslot2region(vm, slot); + + region->region.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + __vm_mem_region_delete(vm, region); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 974bcd2df6d7..636b29ba8985 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -506,6 +506,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -519,6 +521,20 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -675,6 +691,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -686,6 +703,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 089b8925b6b2..d7ac122820bf 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -200,7 +200,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) if (vmx->eptp_gpa) { uint64_t ept_paddr; struct eptPageTablePointer eptp = { - .memory_type = VMX_BASIC_MEM_TYPE_WB, + .memory_type = X86_MEMTYPE_WB, .page_walk_length = 3, /* + 1 */ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 989ffe0d047f..e3711beff7f3 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -417,7 +417,7 @@ static bool _guest_should_exit(void) */ static noinline void host_perform_sync(struct sync_area *sync) { - alarm(2); + alarm(10); atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8e34f7fa44e9..4bc1051848e5 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -41,10 +41,14 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: @@ -414,10 +418,14 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(I), KVM_ISA_EXT_ARR(M), KVM_ISA_EXT_ARR(V), + KVM_ISA_EXT_ARR(SMNPM), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -946,9 +954,13 @@ KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); +KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); +KVM_ISA_EXT_SIMPLE_CONFIG(ssnpm, SSNPM); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE); +KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); @@ -1009,9 +1021,13 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_f, &config_fp_d, &config_h, + &config_smnpm, &config_smstateen, &config_sscofpmf, + &config_ssnpm, &config_sstc, + &config_svade, + &config_svadu, &config_svinval, &config_svnapot, &config_svpbmt, diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..27255880dabd --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not avaialable\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c index f257beec1430..0c112319dab1 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -16,7 +16,11 @@ #include <linux/capability.h> #include <linux/sizes.h> +#define PGM_SEGMENT_TRANSLATION 0x10 + #define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) /* so directly declare capget to check caps without libcap */ int capget(cap_user_header_t header, cap_user_data_t data); @@ -58,6 +62,50 @@ asm("test_gprs_asm:\n" " j 0b\n" ); +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + FIXTURE(uc_kvm) { struct kvm_s390_sie_block *sie_block; @@ -67,6 +115,7 @@ FIXTURE(uc_kvm) uintptr_t base_hva; uintptr_t code_hva; int kvm_run_size; + vm_paddr_t pgd; void *vm_mem; int vcpu_fd; int kvm_fd; @@ -116,7 +165,7 @@ FIXTURE_SETUP(uc_kvm) self->base_gpa = 0; self->code_gpa = self->base_gpa + (3 * SZ_1M); - self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); self->base_hva = (uintptr_t)self->vm_mem; self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; @@ -222,16 +271,112 @@ TEST(uc_cap_hpage) close(kvm_fd); } -/* verify SIEIC exit +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued */ -static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; struct kvm_run *run = self->run; /* check SIE interception code */ - pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -239,7 +384,10 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) case ICPT_INST: /* end execution in caller on intercepted instruction */ pr_info("sie instruction interception\n"); - return false; + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; case ICPT_OPEREXC: /* operation exception */ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); @@ -250,11 +398,17 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) } /* verify VM state on exit */ -static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) { struct kvm_run *run = self->run; switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; case KVM_EXIT_S390_SIEIC: return uc_handle_sieic(self); default: @@ -264,7 +418,7 @@ static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) } /* run the VM until interrupted */ -static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) { int rc; @@ -275,7 +429,7 @@ static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) return rc; } -static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; @@ -286,6 +440,89 @@ static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) TEST_ASSERT_EQ(0x440000, sie_block->ipb); } +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + TEST_F(uc_kvm, uc_gprs) { struct kvm_sync_regs *sync_regs = &self->run->s.regs; @@ -329,4 +566,73 @@ TEST_F(uc_kvm, uc_gprs) ASSERT_EQ(1, sync_regs->gprs[0]); } +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* ISKE */ + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb229); + + /* ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + /* assert initial skey (ACC = 0, R & C = 1) */ + ASSERT_EQ(0x06, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(4, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 903940c54d2d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -86,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -122,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 8c579ce714e9..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -12,17 +12,16 @@ #include "kvm_util.h" #include "processor.h" -/* CPUIDs known to differ */ -struct { - u32 function; - u32 index; -} mangled_cpuids[] = { - /* - * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, - * which are not controlled for by this test. - */ - {.function = 0xd, .index = 0}, - {.function = 0xd, .index = 1}, +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) @@ -56,17 +55,29 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_DONE(); } -static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) { - int i; - - for (i = 0; i < sizeof(mangled_cpuids); i++) { - if (mangled_cpuids[i].function == entrie->function && - mangled_cpuids[i].index == entrie->index) - return true; + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; } - - return false; + return mask; } static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, @@ -79,6 +90,8 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + e1 = &cpuid1->entries[i]; e2 = &cpuid2->entries[i]; @@ -88,15 +101,19 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); - if (is_cpuid_mangled(e1)) - continue; + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); - TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && - e1->ecx == e2->ecx && e1->edx == e2->edx, + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", e1->function, e1->index, - e1->eax, e1->ebx, e1->ecx, e1->edx, - e2->eax, e2->ebx, e2->ecx, e2->edx); + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); } } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 624dc725e14d..28cc66454601 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -19,30 +19,42 @@ #include "kvm_util.h" #include "processor.h" -static inline bool cr4_cpuid_is_sync(void) -{ - uint64_t cr4 = get_cr4(); - - return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); -} +#define MAGIC_HYPERCALL_PORT 0x80 static void guest_code(void) { - uint64_t cr4; + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); GUEST_DONE(); } @@ -62,13 +74,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { /* emulate hypervisor clearing CR4.OSXSAVE */ vcpu_sregs_get(vcpu, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; vcpu_sregs_set(vcpu, &sregs); - break; + continue; + } + + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); break; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 76cc2df9238a..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -166,7 +166,7 @@ int main(void) /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { target_rip += ss_size[i]; memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eda88080c186..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,8 +72,6 @@ int main(int argc, char *argv[]) } done: - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 2e9197eb1652..ae77698e6e97 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -41,8 +41,8 @@ static void guest_sev_code(void) /* Stash state passed via VMSA before any compiled code runs. */ extern void guest_code_xsave(void); asm("guest_code_xsave:\n" - "mov $-1, %eax\n" - "mov $-1, %edx\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" "xsave (%rdi)\n" "jmp guest_sev_es_code"); @@ -70,12 +70,6 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - struct kvm_sregs sregs; - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = XFEATURE_MASK_X87_AVX, - }; vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, @@ -84,11 +78,6 @@ static void test_sync_vmsa(uint32_t policy) vcpu_args_set(vcpu, 1, gva); - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_xcrs_set(vcpu, &xcrs); asm("fninit\n" "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" "fldl %3\n" @@ -192,6 +181,8 @@ static void test_sev_es_shutdown(void) int main(int argc, char *argv[]) { + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); test_sev(guest_sev_code, SEV_POLICY_NO_DBG); @@ -204,7 +195,7 @@ int main(int argc, char *argv[]) test_sev_es_shutdown(); if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { test_sync_vmsa(0); test_sync_vmsa(SEV_POLICY_NO_DBG); } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 1c756db329e5..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -145,11 +145,6 @@ static void __attribute__((__flatten__)) guest_code(void *arg) memset(buffer, 0xcc, sizeof(buffer)); - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xsetbv(0, xgetbv(0) | supported_xcr0); - /* * Modify state for all supported xfeatures to take them out of * their "init" state, i.e. to make them show up in XSTATE_BV. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 7c92536551cc..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -207,6 +207,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); } +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_is_pmu_enabled()); diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 95ce192d0753..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -48,16 +48,16 @@ do { \ static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -79,6 +79,11 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got vector '0x%x'", + vector); + vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, "Expected success on XSETBV(0x%lx), got vector '0x%x'", diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile index 35418a4790be..a080eb54a215 100644 --- a/tools/testing/selftests/livepatch/Makefile +++ b/tools/testing/selftests/livepatch/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ test-state.sh \ test-ftrace.sh \ test-sysfs.sh \ - test-syscall.sh + test-syscall.sh \ + test-kprobe.sh TEST_FILES := settings diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index fc4c6a016d38..e5d06fb40233 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -6,7 +6,10 @@ MAX_RETRIES=600 RETRY_INTERVAL=".1" # seconds -KLP_SYSFS_DIR="/sys/kernel/livepatch" +SYSFS_KERNEL_DIR="/sys/kernel" +SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch" +SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug" +SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes" # Kselftest framework requirement - SKIP code is 4 ksft_skip=4 @@ -55,22 +58,26 @@ function die() { } function push_config() { - DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ + DYNAMIC_DEBUG=$(grep '^kernel/livepatch' "$SYSFS_DEBUG_DIR/dynamic_debug/control" | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled) + KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled") } function pop_config() { if [[ -n "$DYNAMIC_DEBUG" ]]; then - echo -n "$DYNAMIC_DEBUG" > /sys/kernel/debug/dynamic_debug/control + echo -n "$DYNAMIC_DEBUG" > "$SYSFS_DEBUG_DIR/dynamic_debug/control" fi if [[ -n "$FTRACE_ENABLED" ]]; then sysctl kernel.ftrace_enabled="$FTRACE_ENABLED" &> /dev/null fi + if [[ -n "$KPROBE_ENABLED" ]]; then + echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled" + fi } function set_dynamic_debug() { - cat <<-EOF > /sys/kernel/debug/dynamic_debug/control + cat <<-EOF > "$SYSFS_DEBUG_DIR/dynamic_debug/control" file kernel/livepatch/* +p func klp_try_switch_task -p EOF @@ -183,7 +190,7 @@ function load_lp_nowait() { __load_mod "$mod" "$@" # Wait for livepatch in sysfs ... - loop_until '[[ -e "/sys/kernel/livepatch/$mod" ]]' || + loop_until '[[ -e "$SYSFS_KLP_DIR/$mod" ]]' || die "failed to load module $mod (sysfs)" } @@ -196,7 +203,7 @@ function load_lp() { load_lp_nowait "$mod" "$@" # Wait until the transition finishes ... - loop_until 'grep -q '^0$' /sys/kernel/livepatch/$mod/transition' || + loop_until 'grep -q '^0$' $SYSFS_KLP_DIR/$mod/transition' || die "failed to complete transition" } @@ -246,12 +253,12 @@ function unload_lp() { function disable_lp() { local mod="$1" - log "% echo 0 > /sys/kernel/livepatch/$mod/enabled" - echo 0 > /sys/kernel/livepatch/"$mod"/enabled + log "% echo 0 > $SYSFS_KLP_DIR/$mod/enabled" + echo 0 > "$SYSFS_KLP_DIR/$mod/enabled" # Wait until the transition finishes and the livepatch gets # removed from sysfs... - loop_until '[[ ! -e "/sys/kernel/livepatch/$mod" ]]' || + loop_until '[[ ! -e "$SYSFS_KLP_DIR/$mod" ]]' || die "failed to disable livepatch $mod" } @@ -322,7 +329,7 @@ function check_sysfs_rights() { local rel_path="$1"; shift local expected_rights="$1"; shift - local path="$KLP_SYSFS_DIR/$mod/$rel_path" + local path="$SYSFS_KLP_DIR/$mod/$rel_path" local rights=$(/bin/stat --format '%A' "$path") if test "$rights" != "$expected_rights" ; then die "Unexpected access rights of $path: $expected_rights vs. $rights" @@ -338,7 +345,7 @@ function check_sysfs_value() { local rel_path="$1"; shift local expected_value="$1"; shift - local path="$KLP_SYSFS_DIR/$mod/$rel_path" + local path="$SYSFS_KLP_DIR/$mod/$rel_path" local value=`cat $path` if test "$value" != "$expected_value" ; then die "Unexpected value in $path: $expected_value vs. $value" diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index 32b150e25b10..37bbc3fb2780 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -46,7 +46,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux $MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state @@ -94,7 +94,7 @@ livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' $MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init $MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init $MOD_TARGET: ${MOD_TARGET}_init -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state @@ -146,7 +146,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -195,7 +195,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -227,7 +227,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -310,7 +310,7 @@ $MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full fo livepatch: pre-patch callback failed for object '$MOD_TARGET' livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET' insmod: ERROR: could not insert module test_modules/$MOD_TARGET.ko: No such device -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -364,7 +364,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state @@ -412,7 +412,7 @@ load_lp_nowait $MOD_LIVEPATCH # Wait until the livepatch reports in-transition state, i.e. that it's # stalled on $MOD_TARGET_BUSY::busymod_work_func() -loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' || +loop_until 'grep -q '^1$' $SYSFS_KLP_DIR/$MOD_LIVEPATCH/transition' || die "failed to stall transition" load_mod $MOD_TARGET @@ -438,7 +438,7 @@ $MOD_TARGET: ${MOD_TARGET}_init $MOD_TARGET: ${MOD_TARGET}_exit livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': reversing transition from patching to unpatching livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -483,14 +483,14 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': starting unpatching transition livepatch: '$MOD_LIVEPATCH2': completing unpatching transition $MOD_LIVEPATCH2: post_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': unpatching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -539,7 +539,7 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': starting unpatching transition diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh index 730218bce99c..fe14f248913a 100755 --- a/tools/testing/selftests/livepatch/test-ftrace.sh +++ b/tools/testing/selftests/livepatch/test-ftrace.sh @@ -53,7 +53,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh new file mode 100755 index 000000000000..115065156016 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-kprobe.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2024 SUSE +# Author: Michael Vetter <mvetter@suse.com> + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_livepatch +MOD_KPROBE=test_klp_kprobe + +setup_config + +# Kprobe a function and verify that we can't livepatch that same function +# when it uses a post_handler since only one IPMODIFY maybe be registered +# to any given function at a time. + +start_test "livepatch interaction with kprobed function with post_handler" + +echo 1 > "$SYSFS_KPROBES_DIR/enabled" + +load_mod $MOD_KPROBE has_post_handler=true +load_failing_mod $MOD_LIVEPATCH +unload_mod $MOD_KPROBE + +check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=true +% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16) +livepatch: failed to patch object 'vmlinux' +livepatch: failed to enable patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy +% rmmod test_klp_kprobe" + +start_test "livepatch interaction with kprobed function without post_handler" + +load_mod $MOD_KPROBE has_post_handler=false +load_lp $MOD_LIVEPATCH + +unload_mod $MOD_KPROBE +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=false +% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% rmmod test_klp_kprobe +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + +exit 0 diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index bd13257bfdfe..6673023d2b66 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -39,7 +39,7 @@ livepatch: '$MOD_LIVEPATCH1': initializing patching transition livepatch: '$MOD_LIVEPATCH1': starting patching transition livepatch: '$MOD_LIVEPATCH1': completing patching transition livepatch: '$MOD_LIVEPATCH1': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition livepatch: '$MOD_LIVEPATCH1': starting unpatching transition livepatch: '$MOD_LIVEPATCH1': completing unpatching transition @@ -92,14 +92,14 @@ livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete $MOD_LIVEPATCH1: this has been live patched $MOD_REPLACE: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition livepatch: '$MOD_REPLACE': unpatching complete % rmmod $MOD_REPLACE $MOD_LIVEPATCH1: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition livepatch: '$MOD_LIVEPATCH1': starting unpatching transition livepatch: '$MOD_LIVEPATCH1': completing unpatching transition @@ -128,7 +128,7 @@ for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do load_lp "$mod" done -mods=(/sys/kernel/livepatch/*) +mods=($SYSFS_KLP_DIR/*) nmods=${#mods[@]} if [ "$nmods" -ne 3 ]; then die "Expecting three modules listed, found $nmods" @@ -139,7 +139,7 @@ load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || +loop_until 'mods=($SYSFS_KLP_DIR/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || die "Expecting only one moduled listed, found $nmods" # These modules were disabled by the atomic replace @@ -188,7 +188,7 @@ $MOD_REPLACE: this has been live patched % rmmod $MOD_LIVEPATCH2 % rmmod $MOD_LIVEPATCH1 $MOD_REPLACE: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh index 10a52ac06185..04b66380f8a0 100755 --- a/tools/testing/selftests/livepatch/test-state.sh +++ b/tools/testing/selftests/livepatch/test-state.sh @@ -29,7 +29,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux $MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: restore_console_loglevel: restoring console_loglevel @@ -72,7 +72,7 @@ $MOD_LIVEPATCH2: post_patch_callback: vmlinux $MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change livepatch: '$MOD_LIVEPATCH2': patching complete % rmmod $MOD_LIVEPATCH -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel @@ -127,7 +127,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux $MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel @@ -162,7 +162,7 @@ livepatch: '$MOD_LIVEPATCH2': patching complete % insmod test_modules/$MOD_LIVEPATCH.ko livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches. insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Invalid parameters -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh index 289eb7d4c4b3..5f9344277b62 100755 --- a/tools/testing/selftests/livepatch/test-syscall.sh +++ b/tools/testing/selftests/livepatch/test-syscall.sh @@ -27,9 +27,9 @@ pid_list=$(echo ${pids[@]} | tr ' ' ',') load_lp $MOD_SYSCALL klp_pids=$pid_list # wait for all tasks to transition to patched state -loop_until 'grep -q '^0$' /sys/kernel/test_klp_syscall/npids' +loop_until 'grep -q '^0$' $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids' -pending_pids=$(cat /sys/kernel/test_klp_syscall/npids) +pending_pids=$(cat $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids) log "$MOD_SYSCALL: Remaining not livepatched processes: $pending_pids" for pid in ${pids[@]}; do @@ -46,7 +46,7 @@ livepatch: '$MOD_SYSCALL': starting patching transition livepatch: '$MOD_SYSCALL': completing patching transition livepatch: '$MOD_SYSCALL': patching complete $MOD_SYSCALL: Remaining not livepatched processes: 0 -% echo 0 > /sys/kernel/livepatch/$MOD_SYSCALL/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_SYSCALL/enabled livepatch: '$MOD_SYSCALL': initializing unpatching transition livepatch: '$MOD_SYSCALL': starting unpatching transition livepatch: '$MOD_SYSCALL': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 05a14f5a7bfb..2c91428d2997 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -34,7 +34,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -75,7 +75,7 @@ test_klp_callbacks_mod: test_klp_callbacks_mod_exit test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away livepatch: reverting patch 'test_klp_callbacks_demo' on unloading module 'test_klp_callbacks_mod' test_klp_callbacks_demo: post_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/test_klp_callbacks_demo/enabled +% echo 0 > $SYSFS_KLP_DIR/test_klp_callbacks_demo/enabled livepatch: 'test_klp_callbacks_demo': initializing unpatching transition test_klp_callbacks_demo: pre_unpatch_callback: vmlinux livepatch: 'test_klp_callbacks_demo': starting unpatching transition @@ -101,7 +101,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -124,7 +124,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile index e6e638c4bcba..939230e571f5 100644 --- a/tools/testing/selftests/livepatch/test_modules/Makefile +++ b/tools/testing/selftests/livepatch/test_modules/Makefile @@ -6,11 +6,12 @@ obj-m += test_klp_atomic_replace.o \ test_klp_callbacks_demo.o \ test_klp_callbacks_demo2.o \ test_klp_callbacks_mod.o \ + test_klp_kprobe.o \ test_klp_livepatch.o \ + test_klp_shadow_vars.o \ test_klp_state.o \ test_klp_state2.o \ test_klp_state3.o \ - test_klp_shadow_vars.o \ test_klp_syscall.o # Ensure that KDIR exists, otherwise skip the compilation diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c new file mode 100644 index 000000000000..67a8d29012f6 --- /dev/null +++ b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Marcos Paulo de Souza <mpdesouza@suse.com> +// Copyright (C) 2024 Michael Vetter <mvetter@suse.com> + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +static bool has_post_handler = true; +module_param(has_post_handler, bool, 0444); + +static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ +} + +static struct kprobe kp = { + .symbol_name = "cmdline_proc_show", +}; + +static int __init kprobe_init(void) +{ + if (has_post_handler) + kp.post_handler = post_handler; + + return register_kprobe(&kp); +} + +static void __exit kprobe_exit(void) +{ + unregister_kprobe(&kp); +} + +module_init(kprobe_init) +module_exit(kprobe_exit) +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Vetter <mvetter@suse.com>"); +MODULE_DESCRIPTION("Livepatch test: kprobe function"); diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 95af2d78fd31..0a0b55516028 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -9,6 +9,7 @@ #include <fcntl.h> #include <linux/memfd.h> #include <sched.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <signal.h> @@ -1557,6 +1558,11 @@ static void test_share_fork(char *banner, char *b_suffix) close(fd); } +static bool pid_ns_supported(void) +{ + return access("/proc/self/ns/pid", F_OK) == 0; +} + int main(int argc, char **argv) { pid_t pid; @@ -1591,8 +1597,12 @@ int main(int argc, char **argv) test_seal_grow(); test_seal_resize(); - test_sysctl_simple(); - test_sysctl_nested(); + if (pid_ns_supported()) { + test_sysctl_simple(); + test_sysctl_nested(); + } else { + printf("PID namespaces are not supported; skipping sysctl tests\n"); + } test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index da030b43e43b..8f01f4da1c0d 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -51,3 +51,7 @@ hugetlb_madv_vs_map mseal_test seal_elf droppable +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 +guard-pages diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..3de23ea4663f 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -36,6 +36,17 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +KDIR ?= /lib/modules/$(shell uname -r)/build +ifneq (,$(wildcard $(KDIR)/Module.symvers)) +ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h)) +TEST_GEN_MODS_DIR := page_frag +else +PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel" +endif +else +PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first" +endif + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm @@ -79,6 +90,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable +TEST_GEN_FILES += guard-pages ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -105,17 +117,19 @@ endif ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif -else -ifneq (,$(filter $(ARCH),arm64 powerpc)) +else ifeq ($(ARCH),arm64) +TEST_GEN_FILES += protection_keys +TEST_GEN_FILES += pkey_sighandler_tests +else ifeq ($(ARCH),powerpc) TEST_GEN_FILES += protection_keys -endif - endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch +ifneq ($(ARCH),riscv64) TEST_GEN_FILES += virtual_address_range +endif TEST_GEN_FILES += write_to_hugetlbfs endif @@ -126,6 +140,7 @@ TEST_FILES += test_hmm.sh TEST_FILES += va_high_addr_switch.sh TEST_FILES += charge_reserved_hugetlb.sh TEST_FILES += hugetlb_reparenting_test.sh +TEST_FILES += test_page_frag.sh # required by charge_reserved_hugetlb.sh TEST_FILES += write_hugetlb_memory.sh @@ -211,3 +226,12 @@ warn_missing_liburing: echo "Warning: missing liburing support. Some tests will be skipped." ; \ echo endif + +ifneq ($(PAGE_FRAG_WARNING),) +all: warn_missing_page_frag + +warn_missing_page_frag: + @echo ; \ + echo "Warning: $(PAGE_FRAG_WARNING). page_frag test will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c new file mode 100644 index 000000000000..7cdf815d0d63 --- /dev/null +++ b/tools/testing/selftests/mm/guard-pages.c @@ -0,0 +1,1243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <asm-generic/mman.h> /* Force the import of the tools version. */ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <unistd.h> + +/* + * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: + * + * "If the signal occurs other than as the result of calling the abort or raise + * function, the behavior is undefined if the signal handler refers to any + * object with static storage duration other than by assigning a value to an + * object declared as volatile sig_atomic_t" + */ +static volatile sig_atomic_t signal_jump_set; +static sigjmp_buf signal_jmp_buf; + +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + +static int userfaultfd(int flags) +{ + return syscall(SYS_userfaultfd, flags); +} + +static void handle_fatal(int c) +{ + if (!signal_jump_set) + return; + + siglongjmp(signal_jmp_buf, c); +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +/* + * Enable our signal catcher and try to read/write the specified buffer. The + * return value indicates whether the read/write succeeds without a fatal + * signal. + */ +static bool try_access_buf(char *ptr, bool write) +{ + bool failed; + + /* Tell signal handler to jump back here on fatal signal. */ + signal_jump_set = true; + /* If a fatal signal arose, we will jump back here and failed is set. */ + failed = sigsetjmp(signal_jmp_buf, 0) != 0; + + if (!failed) { + if (write) + *ptr = 'x'; + else + FORCE_READ(ptr); + } + + signal_jump_set = false; + return !failed; +} + +/* Try and read from a buffer, return true if no fatal signal. */ +static bool try_read_buf(char *ptr) +{ + return try_access_buf(ptr, false); +} + +/* Try and write to a buffer, return true if no fatal signal. */ +static bool try_write_buf(char *ptr) +{ + return try_access_buf(ptr, true); +} + +/* + * Try and BOTH read from AND write to a buffer, return true if BOTH operations + * succeed. + */ +static bool try_read_write_buf(char *ptr) +{ + return try_read_buf(ptr) && try_write_buf(ptr); +} + +FIXTURE(guard_pages) +{ + unsigned long page_size; +}; + +FIXTURE_SETUP(guard_pages) +{ + struct sigaction act = { + .sa_handler = &handle_fatal, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + if (sigaction(SIGSEGV, &act, NULL)) + ksft_exit_fail_perror("sigaction"); + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); +}; + +FIXTURE_TEARDOWN(guard_pages) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, NULL); +} + +TEST_F(guard_pages, basic) +{ + const unsigned long NUM_PAGES = 10; + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Trivially assert we can touch the first page. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + + /* Establish that 1st page SIGSEGV's. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* Ensure we can touch everything else.*/ + for (i = 1; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Establish a guard page at the end of the mapping. */ + ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_INSTALL), 0); + + /* Check that both guard pages result in SIGSEGV. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* Remove the first guard page. */ + ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + /* Remove the last guard page. */ + ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* + * Test setting a _range_ of pages, namely the first 3. The first of + * these be faulted in, so this also tests that we can install guard + * pages over backed pages. + */ + ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure they are all guard pages. */ + for (i = 0; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Make sure the rest are not. */ + for (i = 3; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Remove guard pages. */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure we can touch everything. */ + for (i = 0; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * Now remove all guard pages, make sure we don't remove existing + * entries. + */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < NUM_PAGES * page_size; i += page_size) { + char chr = ptr[i]; + + ASSERT_EQ(chr, 'x'); + } + + ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0); +} + +/* Assert that operations applied across multiple VMAs work as expected. */ +TEST_F(guard_pages, multi_vma) +{ + const unsigned long page_size = self->page_size; + char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; + int i; + + /* Reserve a 100 page region over which we can install VMAs. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* Place a VMA of 10 pages size at the start of the region. */ + ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + /* Place a VMA of 5 pages size 50 pages into the region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Place a VMA of 20 pages size at the end of the region. */ + ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Unmap gaps. */ + ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---] [---] [---] + */ + + /* + * Now mark the whole range as guard pages and make sure all VMAs are as + * such. + */ + + /* + * madvise() is certifiable and lets you perform operations over gaps, + * everything works, but it indicates an error and errno is set to + * -ENOMEM. Also if anything runs out of memory it is set to + * -ENOMEM. You are meant to guess which is which. + */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now remove guar pages over range and assert the opposite. */ + + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Now map incompatible VMAs in the gaps. */ + ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---][xxxx][---][xxxx][---] + * + * Where 'x' signifies VMAs that cannot be merged with those adjacent to + * them. + */ + + /* Multiple VMAs adjacent to one another should result in no error. */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0); +} + +/* + * Assert that batched operations performed using process_madvise() work as + * expected. + */ +TEST_F(guard_pages, process_madvise) +{ + const unsigned long page_size = self->page_size; + pid_t pid = getpid(); + int pidfd = pidfd_open(pid, 0); + char *ptr_region, *ptr1, *ptr2, *ptr3; + ssize_t count; + struct iovec vec[6]; + + ASSERT_NE(pidfd, -1); + + /* Reserve region to map over. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* + * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we + * overwrite existing entries and test this code path against + * overwriting existing entries. + */ + ptr1 = mmap(&ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + /* We want guard markers at start/end of each VMA. */ + vec[0].iov_base = ptr1; + vec[0].iov_len = page_size; + vec[1].iov_base = &ptr1[9 * page_size]; + vec[1].iov_len = page_size; + + /* 5 pages offset 50 pages into reserve region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + vec[2].iov_base = ptr2; + vec[2].iov_len = page_size; + vec[3].iov_base = &ptr2[4 * page_size]; + vec[3].iov_len = page_size; + + /* 20 pages offset 79 pages into reserve region. */ + ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + vec[4].iov_base = ptr3; + vec[4].iov_len = page_size; + vec[5].iov_base = &ptr3[19 * page_size]; + vec[5].iov_len = page_size; + + /* Free surrounding VMAs. */ + ASSERT_EQ(munmap(ptr_region, page_size), 0); + ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); + + /* Now guard in one step. */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); + + /* OK we don't have permission to do this, skip. */ + if (count == -1 && errno == EPERM) + ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + + /* Returns the number of bytes advised. */ + ASSERT_EQ(count, 6 * page_size); + + /* Now make sure the guarding was applied. */ + + ASSERT_FALSE(try_read_write_buf(ptr1)); + ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr2)); + ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr3)); + ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Now do the same with unguard... */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); + + /* ...and everything should now succeed. */ + + ASSERT_TRUE(try_read_write_buf(ptr1)); + ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr2)); + ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr3)); + ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr1, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr2, 5 * page_size), 0); + ASSERT_EQ(munmap(ptr3, 20 * page_size), 0); + close(pidfd); +} + +/* Assert that unmapping ranges does not leave guard markers behind. */ +TEST_F(guard_pages, munmap) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new1, *ptr_new2; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard first and last pages. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Assert that they are guarded. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size])); + + /* Unmap them. */ + ASSERT_EQ(munmap(ptr, page_size), 0); + ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); + + /* Map over them.*/ + ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new1, MAP_FAILED); + ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new2, MAP_FAILED); + + /* Assert that they are now not guarded. */ + ASSERT_TRUE(try_read_write_buf(ptr_new1)); + ASSERT_TRUE(try_read_write_buf(ptr_new2)); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mprotect() operations have no bearing on guard markers. */ +TEST_F(guard_pages, mprotect) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the middle of the range. */ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Assert that it is indeed guarded. */ + ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size])); + + /* Now make these pages read-only. */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0); + + /* Make sure the range is still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Make sure we can guard again without issue.*/ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Make sure the range is, yet again, still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Now unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is readable. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Split and merge VMAs and make sure guard pages still behave. */ +TEST_F(guard_pages, split_merge) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now unmap some pages in the range so we split. */ + ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0); + + /* Make sure the remaining ranges are guarded post-split. */ + for (i = 0; i < 2; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 2; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 6; i < 8; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 9; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now map them again - the unmap will have cleared the guards. */ + ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now make sure guard pages are established. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + bool expect_true = i == 2 || i == 5 || i == 8; + + ASSERT_TRUE(expect_true ? result : !result); + } + + /* Now guard everything again. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now split the range into three. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* Make sure the whole range is guarded for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_buf(curr)); + } + + /* Now reset protection bits so we merge the whole thing. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Make sure the whole range is still guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Split range into 3 again... */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* ...and unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is remedied for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Merge them again. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Now ensure the merged range is remedied for read/write. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that MADV_DONTNEED does not remove guard markers. */ +TEST_F(guard_pages, dontneed) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Back the whole range. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Guard every other page. */ + for (i = 0; i < 10; i += 2) { + char *curr = &ptr[i * page_size]; + int res = madvise(curr, page_size, MADV_GUARD_INSTALL); + + ASSERT_EQ(res, 0); + } + + /* Indicate that we don't need any of the range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0); + + /* Check to ensure guard markers are still in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_buf(curr); + + if (i % 2 == 0) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + /* Make sure we really did get reset to zero page. */ + ASSERT_EQ(*curr, '\0'); + } + + /* Now write... */ + result = try_write_buf(&ptr[i * page_size]); + + /* ...and make sure same result. */ + ASSERT_TRUE(i % 2 != 0 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mlock()'ed pages work correctly with guard markers. */ +TEST_F(guard_pages, mlock) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Lock. */ + ASSERT_EQ(mlock(ptr, 10 * page_size), 0); + + /* Now try to guard, should fail with EINVAL. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, EINVAL); + + /* OK unlock. */ + ASSERT_EQ(munlock(ptr, 10 * page_size), 0); + + /* Guard first half of range, should now succeed. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure guard works. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + if (i < 5) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + ASSERT_EQ(*curr, 'x'); + } + } + + /* + * Now lock the latter part of the range. We can't lock the guard pages, + * as this would result in the pages being populated and the guarding + * would cause this to error out. + */ + ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Now remove guard pages, we permit mlock()'d ranges to have guard + * pages removed as it is a non-destructive operation. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now check that no guard pages remain. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * - Moving a mapping alone should retain markers as they are. + */ +TEST_F(guard_pages, mremap_move) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guard pages are in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Map a new region we will move this range into. Doing this ensures + * that we have reserved a range to map into. + */ + ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new); + + /* Make sure the guard markers are retained. */ + ASSERT_FALSE(try_read_write_buf(ptr_new)); + ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size])); + + /* + * Clean up - we only need reference the new pointer as we overwrote the + * PROT_NONE range and moved the existing one. + */ + munmap(ptr_new, 5 * page_size); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Expanding should retain guard pages, only now in different position. The user + * will have to remove guard pages manually to fix up (they'd have to do the + * same if it were a PROT_NONE mapping). + */ +TEST_F(guard_pages, mremap_expand) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 10 pages... */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* ...But unmap the last 5 so we can ensure we can expand into them. */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now expand to 10 pages. */ + ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Reserve a region which we can move to and expand into. */ + ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now move and expand into it. */ + ptr = mremap(ptr, 10 * page_size, 20 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new); + ASSERT_EQ(ptr, ptr_new); + + /* + * Again, make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* + * A real user would have to remove guard markers, but would reasonably + * expect all characteristics of the mapping to be retained, including + * guard markers. + */ + + /* Cleanup. */ + munmap(ptr, 20 * page_size); +} +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Shrinking will result in markers that are shrunk over being removed. Again, + * if the user were using a PROT_NONE mapping they'd have to manually fix this + * up also so this is OK. + */ +TEST_F(guard_pages, mremap_shrink) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now shrink to 3 pages. */ + ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE); + ASSERT_NE(ptr, MAP_FAILED); + + /* We expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * As with expansion, a real user would have to remove guard pages and + * fixup. But you'd have to do similar manual things with PROT_NONE + * mappings too. + */ + + /* + * If we expand back to the original size, the end marker will, of + * course, no longer be present. + */ + ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Again, we expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + munmap(ptr, 5 * page_size); +} + +/* + * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set + * retain guard pages. + */ +TEST_F(guard_pages, fork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish guard apges in the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Assert that the guarding is in effect. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Now unguard the range.*/ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Child unguard does not impact parent page table state. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that forking a process with VMAs that do have VM_WIPEONFORK set + * behave as expected. + */ +TEST_F(guard_pages, fork_wipeonfork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark wipe on fork. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0); + + /* Guard the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Guard will have been wiped. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + exit(0); + } + + /* Parent process. */ + + waitpid(pid, NULL, 0); + + /* Guard markers should be in effect.*/ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_FREE retains guard entries as expected. */ +TEST_F(guard_pages, lazyfree) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensure guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Lazyfree range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0); + + /* This should leave the guard markers in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ +TEST_F(guard_pages, populate) +{ + const unsigned long page_size = self->page_size; + char *ptr; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Populate read should error out... */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1); + ASSERT_EQ(errno, EFAULT); + + /* ...as should populate write. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1); + ASSERT_EQ(errno, EFAULT); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ +TEST_F(guard_pages, cold_pageout) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensured guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now mark cold. This should have no impact on guard markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* OK, now page out. This should equally, have no effect on markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that guard pages do not break userfaultd. */ +TEST_F(guard_pages, uffd) +{ + const unsigned long page_size = self->page_size; + int uffd; + char *ptr; + int i; + struct uffdio_api api = { + .api = UFFD_API, + .features = 0, + }; + struct uffdio_register reg; + struct uffdio_range range; + + /* Set up uffd. */ + uffd = userfaultfd(0); + if (uffd == -1 && errno == EPERM) + ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + ASSERT_NE(uffd, -1); + + ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Register the range with uffd. */ + range.start = (unsigned long)ptr; + range.len = 10 * page_size; + reg.range = range; + reg.mode = UFFDIO_REGISTER_MODE_MISSING; + ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, ®), 0); + + /* Guard the range. This should not trigger the uffd. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* The guarding should behave as usual with no uffd intervention. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0); + close(uffd); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0..141bf63cbe05 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map) buffer->fd = -1; buffer->size = size; - buffer->mirror = malloc(npages); + buffer->mirror = malloc(size); ASSERT_NE(buffer->mirror, NULL); /* Reserve a range of addresses. */ diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index f9ac20c657ec..db63abe5ee5e 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -76,26 +76,34 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) /* Get the free huge pages after unmap*/ free_hpage_a = get_free_hugepages(); + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + /* * If the no. of free hugepages before allocation and after unmap does * not match - that means there could still be a page which is pinned. */ - if (free_hpage_a != free_hpage_b) { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_fail(": Huge pages not freed!\n"); - } else { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_pass(": Huge pages freed successfully !\n"); - } + ksft_test_result(free_hpage_a == free_hpage_b, + "free huge pages from %u-%u\n", start_off, end_off); } int main(void) { size_t pagesize = 0; + int fd; ksft_print_header(); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno)); + close(fd); + + /* Check if huge pages are free */ + if (!get_free_hugepages()) + ksft_exit_skip("No free hugepage, exiting\n"); + ksft_set_plan(4); /* Get base page size */ diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c index 73b81c632366..e2640529dbb2 100644 --- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -5,20 +5,36 @@ #include <sys/mman.h> #include <sys/types.h> #include <unistd.h> +#include <setjmp.h> +#include <signal.h> #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 -char *huge_ptr; +static char *huge_ptr; +static size_t huge_page_size; + +static sigjmp_buf sigbuf; +static bool sigbus_triggered; + +static void signal_handler(int signal) +{ + if (signal == SIGBUS) { + sigbus_triggered = true; + siglongjmp(sigbuf, 1); + } +} /* Touch the memory while it is being madvised() */ void *touch(void *unused) { char *ptr = (char *)huge_ptr; + if (sigsetjmp(sigbuf, 1)) + return NULL; + for (int i = 0; i < INLOOP_ITER; i++) ptr[0] = '.'; @@ -30,7 +46,7 @@ void *madv(void *unused) usleep(rand() % 10); for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, huge_page_size, MADV_DONTNEED); return NULL; } @@ -44,9 +60,23 @@ int main(void) * interactions */ int max = 10000; + int err; + + ksft_print_header(); + ksft_set_plan(1); srand(getpid()); + if (signal(SIGBUS, signal_handler) == SIG_ERR) + ksft_exit_skip("Could not register signal handler."); + + huge_page_size = default_huge_page_size(); + if (!huge_page_size) + ksft_exit_skip("Could not detect default hugetlb page size."); + + ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n", + huge_page_size / 1024); + free_hugepages = get_free_hugepages(); if (free_hugepages != 1) { ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", @@ -54,7 +84,7 @@ int main(void) } while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -66,8 +96,14 @@ int main(void) pthread_join(thread1, NULL); pthread_join(thread2, NULL); - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, huge_page_size); } - return KSFT_PASS; + ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n"); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 56d4480e8d3c..8a4d34cce36b 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1091,7 +1091,7 @@ static void usage(void) fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); - fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n"); fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile new file mode 100644 index 000000000000..8c8bb39ffa28 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/Makefile @@ -0,0 +1,18 @@ +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = page_frag_test.ko + +obj-m += page_frag_test.o + +all: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c new file mode 100644 index 000000000000..e806c1866e36 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for page_frag cache + * + * Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> + */ + +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/completion.h> +#include <linux/ptr_ring.h> +#include <linux/kthread.h> +#include <linux/page_frag_cache.h> + +#define TEST_FAILED_PREFIX "page_frag_test failed: " + +static struct ptr_ring ptr_ring; +static int nr_objs = 512; +static atomic_t nthreads; +static struct completion wait; +static struct page_frag_cache test_nc; +static int test_popped; +static int test_pushed; +static bool force_exit; + +static int nr_test = 2000000; +module_param(nr_test, int, 0); +MODULE_PARM_DESC(nr_test, "number of iterations to test"); + +static bool test_align; +module_param(test_align, bool, 0); +MODULE_PARM_DESC(test_align, "use align API for testing"); + +static int test_alloc_len = 2048; +module_param(test_alloc_len, int, 0); +MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); + +static int test_push_cpu; +module_param(test_push_cpu, int, 0); +MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment"); + +static int test_pop_cpu; +module_param(test_pop_cpu, int, 0); +MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment"); + +static int page_frag_pop_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag pop test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_popped < nr_test) { + void *obj = __ptr_ring_consume(ring); + + if (obj) { + test_popped++; + page_frag_free(obj); + } else { + if (force_exit) + break; + + cond_resched(); + } + } + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + pr_info("page_frag pop test thread exits on cpu %d\n", + smp_processor_id()); + + return 0; +} + +static int page_frag_push_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag push test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_pushed < nr_test && !force_exit) { + void *va; + int ret; + + if (test_align) { + va = page_frag_alloc_align(&test_nc, test_alloc_len, + GFP_KERNEL, SMP_CACHE_BYTES); + + if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) { + force_exit = true; + WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n"); + } + } else { + va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL); + } + + if (!va) + continue; + + ret = __ptr_ring_produce(ring, va); + if (ret) { + page_frag_free(va); + cond_resched(); + } else { + test_pushed++; + } + } + + pr_info("page_frag push test thread exits on cpu %d\n", + smp_processor_id()); + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + return 0; +} + +static int __init page_frag_test_init(void) +{ + struct task_struct *tsk_push, *tsk_pop; + int last_pushed = 0, last_popped = 0; + ktime_t start; + u64 duration; + int ret; + + page_frag_cache_init(&test_nc); + atomic_set(&nthreads, 2); + init_completion(&wait); + + if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 || + !cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu)) + return -EINVAL; + + ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL); + if (ret) + return ret; + + tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring, + test_push_cpu, "page_frag_push"); + if (IS_ERR(tsk_push)) + return PTR_ERR(tsk_push); + + tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring, + test_pop_cpu, "page_frag_pop"); + if (IS_ERR(tsk_pop)) { + kthread_stop(tsk_push); + return PTR_ERR(tsk_pop); + } + + start = ktime_get(); + wake_up_process(tsk_push); + wake_up_process(tsk_pop); + + pr_info("waiting for test to complete\n"); + + while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) { + /* exit if there is no progress for push or pop size */ + if (last_pushed == test_pushed || last_popped == test_popped) { + WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n"); + force_exit = true; + continue; + } + + last_pushed = test_pushed; + last_popped = test_popped; + pr_info("page_frag_test progress: pushed = %d, popped = %d\n", + test_pushed, test_popped); + } + + if (force_exit) { + pr_err(TEST_FAILED_PREFIX "exit with error\n"); + goto out; + } + + duration = (u64)ktime_us_delta(ktime_get(), start); + pr_info("%d of iterations for %s testing took: %lluus\n", nr_test, + test_align ? "aligned" : "non-aligned", duration); + +out: + ptr_ring_cleanup(&ptr_ring, NULL); + page_frag_cache_drain(&test_nc); + + return -EAGAIN; +} + +static void __exit page_frag_test_exit(void) +{ +} + +module_init(page_frag_test_init); +module_exit(page_frag_test_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>"); +MODULE_DESCRIPTION("Test module for page_frag"); diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index 580e1b0bb38e..d9d2100eafc0 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -31,6 +31,7 @@ #define NR_RESERVED_PKEYS 1 /* pkey-0 */ #define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 #define PAGE_SIZE sysconf(_SC_PAGESIZE) @@ -126,7 +127,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) return 0; } -static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) { struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); struct poe_context *poe_ctx = diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 9ab6a3ee153b..f7cfe163b0ff 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -112,6 +112,13 @@ void record_pkey_malloc(void *ptr, long size, int prot); #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) #endif +/* + * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged. + */ +#ifndef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 +#endif + #ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 5f28e26a2511..ac91777c8917 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -34,6 +34,8 @@ #define PAGE_SIZE 4096 #define MB (1<<20) +#define PKEY_REG_ALLOW_NONE 0x55555555 + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index a8088b645ad6..c593a426341c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -11,6 +11,7 @@ */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ +#include <linux/mman.h> #include <errno.h> #include <sys/syscall.h> #include <string.h> @@ -59,12 +60,58 @@ long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) : "=a"(ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory"); +#elif defined __aarch64__ + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + register long x5 asm("x5") = a6; + register long x8 asm("x8") = n; + asm volatile ("svc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8) + : "memory"); + ret = x0; #else # error syscall_raw() not implemented #endif return ret; } +static inline long clone_raw(unsigned long flags, void *stack, + int *parent_tid, int *child_tid) +{ + long a1 = flags; + long a2 = (long)stack; + long a3 = (long)parent_tid; +#if defined(__x86_64__) || defined(__i386) + long a4 = (long)child_tid; + long a5 = 0; +#elif defined(__aarch64__) + long a4 = 0; + long a5 = (long)child_tid; +#else +# error clone_raw() not implemented +#endif + + return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0); +} + +/* + * Returns the most restrictive pkey register value that can be used by the + * tests. + */ +static inline u64 pkey_reg_restrictive_default(void) +{ + /* + * Disallow everything except execution on pkey 0, so that each caller + * doesn't need to enable it explicitly (the selftest code runs with + * its code mapped with pkey 0). + */ + return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS); +} + static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) { pthread_mutex_lock(&mutex); @@ -113,7 +160,7 @@ static void raise_sigusr2(void) static void *thread_segv_with_pkey0_disabled(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ *(int *) (0x1) = 1; @@ -123,7 +170,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) static void *thread_segv_pkuerr_stack(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* After we disable MPK 0, we can't access the stack to return */ return NULL; @@ -133,6 +180,7 @@ static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; int *bad = (int *)1; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -142,7 +190,9 @@ static void *thread_segv_maperr_ptr(void *ptr) syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 1 is enabled. */ - __write_pkey_reg(0x55555551); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Segfault */ *bad = 1; @@ -240,6 +290,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -257,7 +308,10 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) assert(stack != MAP_FAILED); /* Allow access to MPK 0 and MPK 1 */ - __write_pkey_reg(0x55555550); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ pkey = pkey_alloc(0, 0); @@ -272,14 +326,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) memset(&siginfo, 0, sizeof(siginfo)); /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; @@ -307,7 +360,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) static void test_pkru_preserved_after_sigusr1(void) { struct sigaction sa; - unsigned long pkru = 0x45454544; + u64 pkey_reg; + + /* Allow access to MPK 0 and an arbitrary set of keys */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED); sa.sa_flags = SA_SIGINFO; @@ -320,7 +379,7 @@ static void test_pkru_preserved_after_sigusr1(void) memset(&siginfo, 0, sizeof(siginfo)); - __write_pkey_reg(pkru); + __write_pkey_reg(pkey_reg); raise(SIGUSR1); @@ -330,7 +389,7 @@ static void test_pkru_preserved_after_sigusr1(void) pthread_mutex_unlock(&mutex); /* Ensure the pkru value is the same after returning from signal. */ - ksft_test_result(pkru == __read_pkey_reg() && + ksft_test_result(pkey_reg == __read_pkey_reg() && siginfo.si_signo == SIGUSR1, "%s\n", __func__); } @@ -347,6 +406,7 @@ static noinline void *thread_sigusr2_self(void *ptr) 'S', 'I', 'G', 'U', 'S', 'R', '2', '.', '.', '.', '\n', '\0'}; stack_t *stack = ptr; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -356,7 +416,9 @@ static noinline void *thread_sigusr2_self(void *ptr) syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 2 is enabled. */ - __write_pkey_reg(0x55555545); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); raise_sigusr2(); @@ -384,6 +446,7 @@ static void test_pkru_sigreturn(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_handler = SIG_DFL; sa.sa_flags = 0; @@ -418,7 +481,10 @@ static void test_pkru_sigreturn(void) * the current thread's stack is protected by the default MPK 0. Hence * both need to be enabled. */ - __write_pkey_reg(0x55555544); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ pkey = pkey_alloc(0, 0); @@ -431,14 +497,13 @@ static void test_pkru_sigreturn(void) sigstack.ss_size = STACK_SIZE; /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c5797ad1d37b..2fc290d9430c 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -75,6 +75,8 @@ separated by spaces: read-only VMAs - mdwe test prctl(PR_SET_MDWE, ...) +- page_frag + test handling of page fragment allocation and freeing example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -347,10 +349,12 @@ if [ $VADDR64 -ne 0 ]; then # allows high virtual address allocation requests independent # of platform's physical memory. - prev_policy=$(cat /proc/sys/vm/overcommit_memory) - echo 1 > /proc/sys/vm/overcommit_memory - CATEGORY="hugevm" run_test ./virtual_address_range - echo $prev_policy > /proc/sys/vm/overcommit_memory + if [ -x ./virtual_address_range ]; then + prev_policy=$(cat /proc/sys/vm/overcommit_memory) + echo 1 > /proc/sys/vm/overcommit_memory + CATEGORY="hugevm" run_test ./virtual_address_range + echo $prev_policy > /proc/sys/vm/overcommit_memory + fi # va high address boundary switch test ARCH_ARM64="arm64" @@ -456,6 +460,12 @@ CATEGORY="mkdirty" run_test ./mkdirty CATEGORY="mdwe" run_test ./mdwe_test +CATEGORY="page_frag" run_test ./test_page_frag.sh smoke + +CATEGORY="page_frag" run_test ./test_page_frag.sh aligned + +CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/test_page_frag.sh b/tools/testing/selftests/mm/test_page_frag.sh new file mode 100755 index 000000000000..f55b105084cf --- /dev/null +++ b/tools/testing/selftests/mm/test_page_frag.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> +# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> +# +# This is a test script for the kernel test driver to test the +# correctness and performance of page_frag's implementation. +# Therefore it is just a kernel module loader. You can specify +# and pass different parameters in order to: +# a) analyse performance of page fragment allocations; +# b) stressing and stability check of page_frag subsystem. + +DRIVER="./page_frag/page_frag_test.ko" +CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) +TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}') + +if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then + TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}') + NR_TEST=100000000 +else + TEST_CPU_1=$TEST_CPU_0 + NR_TEST=1000000 +fi + +# 1 if fails +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +check_test_failed_prefix() { + if dmesg | grep -q 'page_frag_test failed:';then + echo "page_frag_test failed, please check dmesg" + exit $exitcode + fi +} + +# +# Static templates for testing of page_frag APIs. +# Also it is possible to pass any supported parameters manually. +# +SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1" +NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST" +ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1" + +check_test_requirements() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit $ksft_skip + fi + + if ! which insmod > /dev/null 2>&1; then + echo "$0: You need insmod installed" + exit $ksft_skip + fi + + if [ ! -f $DRIVER ]; then + echo "$0: You need to compile page_frag_test module" + exit $ksft_skip + fi +} + +run_nonaligned_check() +{ + echo "Run performance tests to evaluate how fast nonaligned alloc API is." + + insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1 +} + +run_aligned_check() +{ + echo "Run performance tests to evaluate how fast aligned alloc API is." + + insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1 +} + +run_smoke_check() +{ + echo "Run smoke test." + + insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1 +} + +usage() +{ + echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | " + echo "manual parameters" + echo + echo "Valid tests and parameters:" + echo + modinfo $DRIVER + echo + echo "Example usage:" + echo + echo "# Shows help message" + echo "$0" + echo + echo "# Smoke testing" + echo "$0 smoke" + echo + echo "# Performance testing for nonaligned alloc API" + echo "$0 nonaligned" + echo + echo "# Performance testing for aligned alloc API" + echo "$0 aligned" + echo + exit 0 +} + +function validate_passed_args() +{ + VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'` + + # + # Something has been passed, check it. + # + for passed_arg in $@; do + key=${passed_arg//=*/} + valid=0 + + for valid_arg in $VALID_ARGS; do + if [[ $key = $valid_arg ]]; then + valid=1 + break + fi + done + + if [[ $valid -ne 1 ]]; then + echo "Error: key is not correct: ${key}" + exit $exitcode + fi + done +} + +function run_manual_check() +{ + # + # Validate passed parameters. If there is wrong one, + # the script exists and does not execute further. + # + validate_passed_args $@ + + echo "Run the test with following parameters: $@" + insmod $DRIVER $@ > /dev/null 2>&1 +} + +function run_test() +{ + if [ $# -eq 0 ]; then + usage + else + if [[ "$1" = "smoke" ]]; then + run_smoke_check + elif [[ "$1" = "nonaligned" ]]; then + run_nonaligned_check + elif [[ "$1" = "aligned" ]]; then + run_aligned_check + else + run_manual_check $@ + fi + fi + + check_test_failed_prefix + + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +check_test_requirements +run_test $@ + +exit 0 diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index b3d21eed203d..a2e71b1636e7 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -241,6 +241,8 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; + ready_for_fork = true; + /* Read until a full msg received */ while (uffd_read_msg(args->parent_uffd, &msg)); @@ -308,8 +310,11 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { + ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ } child = fork(); diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 4e4c1e311247..2a2b69e91950 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -64,7 +64,7 @@ #define NR_CHUNKS_HIGH NR_CHUNKS_384TB #endif -static char *hind_addr(void) +static char *hint_addr(void) { int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); @@ -185,7 +185,7 @@ int main(int argc, char *argv[]) } for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); + hint = hint_addr(); hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/tools/testing/selftests/module/Makefile b/tools/testing/selftests/module/Makefile new file mode 100644 index 000000000000..6132d7ddb08b --- /dev/null +++ b/tools/testing/selftests/module/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for module loading selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := find_symbol.sh + +include ../lib.mk + +# Nothing to clean up. +clean: diff --git a/tools/testing/selftests/module/config b/tools/testing/selftests/module/config new file mode 100644 index 000000000000..b0c206b1ad47 --- /dev/null +++ b/tools/testing/selftests/module/config @@ -0,0 +1,3 @@ +CONFIG_TEST_RUNTIME=y +CONFIG_TEST_RUNTIME_MODULE=y +CONFIG_TEST_KALLSYMS=m diff --git a/tools/testing/selftests/module/find_symbol.sh b/tools/testing/selftests/module/find_symbol.sh new file mode 100755 index 000000000000..2c56805c9b6e --- /dev/null +++ b/tools/testing/selftests/module/find_symbol.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +# Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org> +# +# This is a stress test script for kallsyms through find_symbol() + +set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +test_reqs() +{ + if ! which modprobe 2> /dev/null > /dev/null; then + echo "$0: You need modprobe installed" >&2 + exit $ksft_skip + fi + + if ! which kmod 2> /dev/null > /dev/null; then + echo "$0: You need kmod installed" >&2 + exit $ksft_skip + fi + + if ! which perf 2> /dev/null > /dev/null; then + echo "$0: You need perf installed" >&2 + exit $ksft_skip + fi + + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo $msg must be run as root >&2 + exit $ksft_skip + fi +} + +load_mod() +{ + local STATS="-e duration_time" + STATS="$STATS -e user_time" + STATS="$STATS -e system_time" + STATS="$STATS -e page-faults" + local MOD=$1 + + local ARCH="$(uname -m)" + case "${ARCH}" in + x86_64) + perf stat $STATS $MODPROBE $MOD + ;; + *) + time $MODPROBE $MOD + exit 1 + ;; + esac +} + +remove_all() +{ + $MODPROBE -r test_kallsyms_b + for i in a b c d; do + $MODPROBE -r test_kallsyms_$i + done +} +test_reqs + +MODPROBE=$(</proc/sys/kernel/modprobe) + +remove_all +load_mod test_kallsyms_b +remove_all + +# Now pollute the namespace +$MODPROBE test_kallsyms_c +load_mod test_kallsyms_b + +# Now pollute the namespace with twice the number of symbols than the last time +remove_all +$MODPROBE test_kallsyms_c +$MODPROBE test_kallsyms_d +load_mod test_kallsyms_b + +exit 0 diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index c6a8c732b802..70f65eb320a7 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1026,7 +1026,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) "size=100000,mode=700"), 0); ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, - "size=100000,mode=700"), 0); + "size=2m,mode=700"), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0); @@ -1414,6 +1414,13 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", AT_RECURSIVE | AT_EMPTY_PATH | @@ -1433,6 +1440,8 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); + + (void)umount2("/mnt/A", MNT_DETACH); } TEST_F(mount_setattr, mount_attr_nosymfollow) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 1c04c780db66..28a715a8ef2b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,6 +2,7 @@ bind_bhash bind_timewait bind_wildcard +busy_poller cmsg_sender diag_uid epoll_busy_poll @@ -16,8 +17,9 @@ ipsec ipv6_flowlabel ipv6_flowlabel_mgr log.txt +msg_oob msg_zerocopy -ncdevmem +netlink-dumps nettest psock_fanout psock_snd diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 649f1fe0dc46..cb2fc601de66 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -92,14 +92,15 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += fdb_flush.sh +TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py +TEST_PROGS += ipv6_route_update_soft_lockup.sh +TEST_PROGS += busy_poll_test.sh # YNL files, must be before "include ..lib.mk" -EXTRA_CLEAN += $(OUTPUT)/libynl.a -YNL_GEN_FILES := ncdevmem +YNL_GEN_FILES := busy_poller netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index 3efe44f6e92a..d10f420e4ef6 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -594,8 +594,9 @@ def check_extack_nsim(output, reference, args): check_extack(output, "netdevsim: " + reference, args) def check_no_extack(res, needle): - fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), - "Found '%s' in command output, leaky extack?" % (needle)) + haystack = (res[1] + res[2]).strip() + fail(haystack.count(needle) or haystack.count("Warning:"), + "Unexpected command output, leaky extack? ('%s', '%s')" % (needle, haystack)) def check_verifier_log(output, reference): lines = output.split("\n") diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh new file mode 100755 index 000000000000..7db292ec4884 --- /dev/null +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +# busy poll config +MAX_EVENTS=8 +BUSY_POLL_USECS=0 +BUSY_POLL_BUDGET=16 +PREFER_BUSY_POLL=1 + +# IRQ deferral config +NAPI_DEFER_HARD_IRQS=100 +GRO_FLUSH_TIMEOUT=50000 +SUSPEND_TIMEOUT=20000000 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + # ensure the server has 1 queue + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +test_busypoll() +{ + suspend_value=${1:-0} + tmp_file=$(mktemp) + out_file=$(mktemp) + + # fill a test file with random data + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null + + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ + -p${SERVER_PORT} \ + -b${SERVER_IP} \ + -m${MAX_EVENTS} \ + -u${BUSY_POLL_USECS} \ + -P${PREFER_BUSY_POLL} \ + -g${BUSY_POLL_BUDGET} \ + -i${NSIM_SV_IFIDX} \ + -s${suspend_value} \ + -o${out_file}& + + wait_local_port_listen nssv ${SERVER_PORT} tcp + + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} + + wait + + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') + + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then + res=0 + else + echo "md5sum mismatch" + echo "input file md5sum: ${tmp_file_md5sum}"; + echo "output file md5sum: ${out_file_md5sum}"; + res=1 + fi + + rm $out_file $tmp_file + + return $res +} + +test_busypoll_with_suspend() +{ + test_busypoll ${SUSPEND_TIMEOUT} + + return $? +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +test_busypoll +if [ $? -ne 0 ]; then + echo "test_busypoll failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_suspend +if [ $? -ne 0 ]; then + echo "test_busypoll_with_suspend failed" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c new file mode 100644 index 000000000000..99b0e8c17fca --- /dev/null +++ b/tools/testing/selftests/net/busy_poller.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> + +#include "netdev-user.h" + +/* The below ifdef blob is required because: + * + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, + * systems with older glibcs will not have them available. However, + * sys/epoll.h does include the type definition for epoll_data, which is + * needed by the user program (e.g. epoll_event.data.fd) + * + * - linux/eventpoll.h does not define the epoll_data type, it is simply an + * opaque __u64. It does, however, include the ioctl definition. + * + * Including both headers is impossible (types would be redefined), so I've + * opted instead to take sys/epoll.h, and include the blob below. + * + * Someday, when glibc is globally up to date, the blob below can be removed. + */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +static uint32_t cfg_port = 8000; +static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; +static char *cfg_outfile; +static int cfg_max_events = 8; +static int cfg_ifindex; + +/* busy poll params */ +static uint32_t cfg_busy_poll_usecs; +static uint32_t cfg_busy_poll_budget; +static uint32_t cfg_prefer_busy_poll; + +/* IRQ params */ +static uint32_t cfg_defer_hard_irqs; +static uint64_t cfg_gro_flush_timeout; +static uint64_t cfg_irq_suspend_timeout; + +static void usage(const char *filepath) +{ + error(1, 0, + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + switch (c) { + case 'u': + cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_usecs == ULONG_MAX || + cfg_busy_poll_usecs > UINT32_MAX) + error(1, ERANGE, "busy_poll_usecs too large"); + break; + case 'P': + cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); + if (cfg_prefer_busy_poll == ULONG_MAX || + cfg_prefer_busy_poll > 1) + error(1, ERANGE, + "prefer busy poll should be 0 or 1"); + break; + case 'g': + cfg_busy_poll_budget = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_budget == ULONG_MAX || + cfg_busy_poll_budget > UINT16_MAX) + error(1, ERANGE, + "busy poll budget must be [0, UINT16_MAX]"); + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + if (cfg_port > UINT16_MAX) + error(1, ERANGE, "port must be <= 65535"); + break; + case 'b': + ret = inet_aton(optarg, &cfg_bind_addr); + if (ret == 0) + error(1, errno, + "bind address %s invalid", optarg); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + case 'm': + cfg_max_events = strtol(optarg, NULL, 0); + + if (cfg_max_events == LONG_MIN || + cfg_max_events == LONG_MAX || + cfg_max_events <= 0) + error(1, ERANGE, + "max events must be > 0 and < LONG_MAX"); + break; + case 'd': + cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); + + if (cfg_defer_hard_irqs == ULONG_MAX || + cfg_defer_hard_irqs > INT32_MAX) + error(1, ERANGE, + "defer_hard_irqs must be <= INT32_MAX"); + break; + case 'r': + cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); + + if (cfg_gro_flush_timeout == ULLONG_MAX) + error(1, ERANGE, + "gro_flush_timeout must be < ULLONG_MAX"); + break; + case 's': + cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); + + if (cfg_irq_suspend_timeout == ULLONG_MAX) + error(1, ERANGE, + "irq_suspend_timeout must be < ULLONG_MAX"); + break; + case 'i': + cfg_ifindex = strtoul(optarg, NULL, 0); + if (cfg_ifindex == ULONG_MAX) + error(1, ERANGE, + "ifindex must be < ULONG_MAX"); + break; + } + } + + if (!cfg_ifindex) + usage(argv[0]); + + if (optind != argc) + usage(argv[0]); +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) + error(1, errno, "epoll_ctl add fd: %d", fd); +} + +static void setnonblock(int sockfd) +{ + int flags; + + flags = fcntl(sockfd, F_GETFL, 0); + + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) + error(1, errno, "unable to set socket to nonblocking mode"); +} + +static void write_chunk(int fd, char *buf, ssize_t buflen) +{ + ssize_t remaining = buflen; + char *buf_offset = buf; + ssize_t writelen = 0; + ssize_t write_result; + + while (writelen < buflen) { + write_result = write(fd, buf_offset, remaining); + if (write_result == -1) + error(1, errno, "unable to write data to outfile"); + + writelen += write_result; + remaining -= write_result; + buf_offset += write_result; + } +} + +static void setup_queue(void) +{ + struct netdev_napi_get_list *napi_list = NULL; + struct netdev_napi_get_req_dump *req = NULL; + struct netdev_napi_set_req *set_req = NULL; + struct ynl_sock *ys; + struct ynl_error yerr; + uint32_t napi_id; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) + error(1, 0, "YNL: %s", yerr.msg); + + req = netdev_napi_get_req_dump_alloc(); + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); + napi_list = netdev_napi_get_dump(ys, req); + + /* assume there is 1 NAPI configured and take the first */ + if (napi_list->obj._present.id) + napi_id = napi_list->obj.id; + else + error(1, 0, "napi ID not present?"); + + set_req = netdev_napi_set_req_alloc(); + netdev_napi_set_req_set_id(set_req, napi_id); + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); + netdev_napi_set_req_set_gro_flush_timeout(set_req, + cfg_gro_flush_timeout); + netdev_napi_set_req_set_irq_suspend_timeout(set_req, + cfg_irq_suspend_timeout); + + if (netdev_napi_set(ys, set_req)) + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); + + netdev_napi_get_list_free(napi_list); + netdev_napi_get_req_dump_free(req); + netdev_napi_set_req_free(set_req); + ynl_sock_destroy(ys); +} + +static void run_poller(void) +{ + struct epoll_event events[cfg_max_events]; + struct epoll_params epoll_params = {0}; + struct sockaddr_in server_addr; + int i, epfd, nfds; + ssize_t readlen; + int outfile_fd; + char buf[1024]; + int sockfd; + int conn; + int val; + + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); + if (outfile_fd == -1) + error(1, errno, "unable to open outfile: %s", cfg_outfile); + + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) + error(1, errno, "unable to create listen socket"); + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(cfg_port); + server_addr.sin_addr = cfg_bind_addr; + + /* these values are range checked during parse_opts, so casting is safe + * here + */ + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; + epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.__pad = 0; + + val = 1; + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) + error(1, errno, "poller setsockopt reuseaddr"); + + setnonblock(sockfd); + + if (bind(sockfd, (struct sockaddr *)&server_addr, + sizeof(struct sockaddr_in))) + error(0, errno, "poller bind to port: %d\n", cfg_port); + + if (listen(sockfd, 1)) + error(1, errno, "poller listen"); + + epfd = epoll_create1(0); + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) + error(1, errno, "unable to set busy poll params"); + + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); + + for (;;) { + nfds = epoll_wait(epfd, events, cfg_max_events, -1); + for (i = 0; i < nfds; i++) { + if (events[i].data.fd == sockfd) { + conn = accept(sockfd, NULL, NULL); + if (conn == -1) + error(1, errno, + "accepting incoming connection failed"); + + setnonblock(conn); + epoll_ctl_add(epfd, conn, + EPOLLIN | EPOLLET | EPOLLRDHUP | + EPOLLHUP); + } else if (events[i].events & EPOLLIN) { + for (;;) { + readlen = read(events[i].data.fd, buf, + sizeof(buf)); + if (readlen > 0) + write_chunk(outfile_fd, buf, + readlen); + else + break; + } + } else { + /* spurious event ? */ + } + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { + epoll_ctl(epfd, EPOLL_CTL_DEL, + events[i].data.fd, NULL); + close(events[i].data.fd); + close(outfile_fd); + return; + } + } + } +} + +int main(int argc, char *argv[]) +{ + parse_opts(argc, argv); + setup_queue(); + run_poller(); + return 0; +} diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index 7c4818c971fc..507d0a82f5f0 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -77,7 +77,7 @@ sw_drops_test() rm ${dir}/packets.pcap - { kill %% && wait %%; } 2>/dev/null + kill_process %% timeout 5 dwdump -o sw -w ${dir}/packets.pcap (( $(tshark -r ${dir}/packets.pcap \ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh new file mode 100755 index 000000000000..c03151e7791c --- /dev/null +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_dup_bridge + test_dup_vxlan_self + test_dup_vxlan_master + test_dup_macvlan_self + test_dup_macvlan_master +" + +do_test_dup() +{ + local op=$1; shift + local what=$1; shift + local tmpf + + RET=0 + + tmpf=$(mktemp) + defer rm "$tmpf" + + defer_scope_push + bridge monitor fdb &> "$tmpf" & + defer kill_process $! + + sleep 0.5 + bridge fdb "$op" 00:11:22:33:44:55 vlan 1 "$@" + sleep 0.5 + defer_scope_pop + + local count=$(grep -c -e 00:11:22:33:44:55 $tmpf) + ((count == 1)) + check_err $? "Got $count notifications, expected 1" + + log_test "$what $op: Duplicate notifications" +} + +test_dup_bridge() +{ + ip_link_add br up type bridge vlan_filtering 1 + do_test_dup add "bridge" dev br self + do_test_dup del "bridge" dev br self +} + +test_dup_vxlan_self() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_master vx br + + do_test_dup add "vxlan" dev vx self dst 192.0.2.1 + do_test_dup del "vxlan" dev vx self dst 192.0.2.1 +} + +test_dup_vxlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_master vx br + + do_test_dup add "vxlan master" dev vx master + do_test_dup del "vxlan master" dev vx master +} + +test_dup_macvlan_self() +{ + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + + do_test_dup add "macvlan self" dev mv self + do_test_dup del "macvlan self" dev mv self +} + +test_dup_macvlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + ip_link_master mv br + + do_test_dup add "macvlan master" dev mv self + do_test_dup del "macvlan master" dev mv self +} + +cleanup() +{ + defer_scopes_cleanup +} + +trap cleanup EXIT +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5f3c28fc8624..3ea6f886a210 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -689,7 +689,7 @@ fib6_notify_test() log_test $ret 0 "ipv6 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% #rm errors.txt @@ -736,7 +736,7 @@ fib_notify_test() log_test $ret 0 "ipv4 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm errors.txt @@ -2328,7 +2328,7 @@ ipv4_mangle_test() $IP route del table 123 172.16.101.0/24 dev veth1 $IP rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2386,7 +2386,7 @@ ipv6_mangle_test() $IP -6 route del table 123 2001:db8:101::/64 dev veth1 $IP -6 rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 224346426ef2..7d885cff8d79 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -126,6 +126,7 @@ TEST_FILES := devlink_lib.sh \ tc_common.sh TEST_INCLUDES := \ - ../lib.sh + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 62a05bca1e82..18afa89ebbcc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -501,7 +501,7 @@ devlink_trap_drop_cleanup() local pref=$1; shift local handle=$1; shift - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh index 96c97064f2d3..becc7c3fc809 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh index ff9fb0db9bd1..e5335116a2fd 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh index 12c138785242..7e0cbfdefab0 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh index 83b55c30a5c3..e0844495f3d1 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh index 256607916d92..741bc9c928eb 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh index ad1bcd6334a8..ad9eab4b1367 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 24f4ab328bd2..2d91281dc5b7 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -436,3 +436,83 @@ test_mtu_change() check_err $? log_test "ping GRE IPv6, packet size 1800 after MTU change" } + +topo_flat_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + ip link set dev g1a type ip6gre local $new1 remote $new2 + __addr_add_del g1a add "$new1/128" + __addr_add_del g1a del "$old1/128" + ip -6 route add $new2/128 via 2001:db8:10::2 + ip -6 route del $old2/128 + + ip link set dev g2a type ip6gre local $new2 remote $new1 + __addr_add_del g2a add "$new2/128" + __addr_add_del g2a del "$old2/128" + ip -6 route add vrf v$ol2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ol2 $old1/128 +} + +flat_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +flat_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +topo_hier_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + __addr_add_del dummy1 del "$old1/64" + __addr_add_del dummy1 add "$new1/64" + ip link set dev g1a type ip6gre local $new1 remote $new2 + ip -6 route add vrf v$ul1 $new2/128 via 2001:db8:10::2 + ip -6 route del vrf v$ul1 $old2/128 + + __addr_add_del dummy2 del "$old2/64" + __addr_add_del dummy2 add "$new2/64" + ip link set dev g2a type ip6gre local $new2 remote $new1 + ip -6 route add vrf v$ul2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ul2 $old1/128 +} + +hier_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} + +hier_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index c992e385159c..7337f398f9cc 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -48,7 +48,6 @@ declare -A NETIFS=( : "${WAIT_TIME:=5}" # Whether to pause on, respectively, after a failure and before cleanup. -: "${PAUSE_ON_FAIL:=no}" : "${PAUSE_ON_CLEANUP:=no}" # Whether to create virtual interfaces, and what netdevice type they should be. @@ -446,191 +445,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -ret_set_ksft_status() -{ - local ksft_status=$1; shift - local msg=$1; shift - - RET=$(ksft_status_merge $RET $ksft_status) - if (( $? )); then - retmsg=$msg - fi -} - -# Whether FAILs should be interpreted as XFAILs. Internal. -FAIL_TO_XFAIL= - -check_err() -{ - local err=$1 - local msg=$2 - - if ((err)); then - if [[ $FAIL_TO_XFAIL = yes ]]; then - ret_set_ksft_status $ksft_xfail "$msg" - else - ret_set_ksft_status $ksft_fail "$msg" - fi - fi -} - -check_fail() -{ - local err=$1 - local msg=$2 - - check_err $((!err)) "$msg" -} - -check_err_fail() -{ - local should_fail=$1; shift - local err=$1; shift - local what=$1; shift - - if ((should_fail)); then - check_fail $err "$what succeeded, but should have failed" - else - check_err $err "$what failed" - fi -} - -xfail() -{ - FAIL_TO_XFAIL=yes "$@" -} - -xfail_on_slow() -{ - if [[ $KSFT_MACHINE_SLOW = yes ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -omit_on_slow() -{ - if [[ $KSFT_MACHINE_SLOW != yes ]]; then - "$@" - fi -} - -xfail_on_veth() -{ - local dev=$1; shift - local kind - - kind=$(ip -j -d link show dev $dev | - jq -r '.[].linkinfo.info_kind') - if [[ $kind = veth ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -log_test_result() -{ - local test_name=$1; shift - local opt_str=$1; shift - local result=$1; shift - local retmsg=$1; shift - - printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" - if [[ $retmsg ]]; then - printf "\t%s\n" "$retmsg" - fi -} - -pause_on_fail() -{ - if [[ $PAUSE_ON_FAIL == yes ]]; then - echo "Hit enter to continue, 'q' to quit" - read a - [[ $a == q ]] && exit 1 - fi -} - -handle_test_result_pass() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" " OK " -} - -handle_test_result_fail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" FAIL "$retmsg" - pause_on_fail -} - -handle_test_result_xfail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" - pause_on_fail -} - -handle_test_result_skip() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" SKIP "$retmsg" -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if ((RET == ksft_pass)); then - handle_test_result_pass "$test_name" "$opt_str" - elif ((RET == ksft_xfail)); then - handle_test_result_xfail "$test_name" "$opt_str" - elif ((RET == ksft_skip)); then - handle_test_result_skip "$test_name" "$opt_str" - else - handle_test_result_fail "$test_name" "$opt_str" - fi - - EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) - return $RET -} - -log_test_skip() -{ - RET=$ksft_skip retmsg= log_test "$@" -} - -log_test_xfail() -{ - RET=$ksft_xfail retmsg= log_test "$@" -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" @@ -1398,13 +1212,10 @@ matchall_sink_create() action drop } -tests_run() +cleanup() { - local current_test - - for current_test in ${TESTS:-$ALL_TESTS}; do - $current_test - done + pre_cleanup + defer_scopes_cleanup } multipath_eval() @@ -1761,8 +1572,9 @@ start_tcp_traffic() stop_traffic() { - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null + local pid=${1-%%}; shift + + kill_process "$pid" } declare -A cappid diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index 9e677aa64a06..694ece9ba3a7 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -202,7 +202,7 @@ one_bridge_two_pvids() ip link set $swp2 master br0 bridge vlan add dev $swp1 vid 1 pvid untagged - bridge vlan add dev $swp1 vid 2 pvid untagged + bridge vlan add dev $swp2 vid 2 pvid untagged run_test "Switch ports in VLAN-aware bridge with different PVIDs" diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index e60c8b4818cc..1f6f53e284b5 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -24,15 +24,10 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc add dev $swp2 root handle 1: tbf \ rate 1Gbit burst 1Mbit latency 100ms + defer tc qdisc del dev $swp2 root PARENT="parent 1:" } -switch_destroy() -{ - ets_switch_destroy - tc qdisc del dev $swp2 root -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index f906fcc66572..8f9922c695b0 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -166,44 +166,32 @@ h1_create() local i; simple_if_init $h1 + defer simple_if_fini $h1 + mtu_set $h1 9900 + defer mtu_restore $h1 + for i in {0..2}; do vlan_create $h1 1$i v$h1 $(sip $i)/28 + defer vlan_destroy $h1 1$i ip link set dev $h1.1$i type vlan egress 0:$i done } -h1_destroy() -{ - local i - - for i in {0..2}; do - vlan_destroy $h1 1$i - done - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { local i simple_if_init $h2 - mtu_set $h2 9900 - for i in {0..2}; do - vlan_create $h2 1$i v$h2 $(dip $i)/28 - done -} + defer simple_if_fini $h2 -h2_destroy() -{ - local i + mtu_set $h2 9900 + defer mtu_restore $h2 for i in {0..2}; do - vlan_destroy $h2 1$i + vlan_create $h2 1$i v$h2 $(dip $i)/28 + defer vlan_destroy $h2 1$i done - mtu_restore $h2 - simple_if_fini $h2 } ets_switch_create() @@ -211,44 +199,45 @@ ets_switch_create() local i ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 9900 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 9900 + defer mtu_restore $swp2 for i in {0..2}; do vlan_create $swp1 1$i + defer vlan_destroy $swp1 1$i ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 vlan_create $swp2 1$i + defer vlan_destroy $swp2 1$i ip link add dev br1$i type bridge + defer ip link del dev br1$i + ip link set dev $swp1.1$i master br1$i + defer ip link set dev $swp1.1$i nomaster + ip link set dev $swp2.1$i master br1$i + defer ip link set dev $swp2.1$i nomaster ip link set dev br1$i up - ip link set dev $swp1.1$i up - ip link set dev $swp2.1$i up - done -} + defer ip link set dev br1$i down -ets_switch_destroy() -{ - local i - - ets_delete_qdisc + ip link set dev $swp1.1$i up + defer ip link set dev $swp1.1$i down - for i in {0..2}; do - ip link del dev br1$i - vlan_destroy $swp2 1$i - vlan_destroy $swp1 1$i + ip link set dev $swp2.1$i up + defer ip link set dev $swp2.1$i down done - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer ets_delete_qdisc } setup_prepare() @@ -263,23 +252,13 @@ setup_prepare() hut=$h2 vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(dip 0) " vlan 10" diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index f9d26a7911bb..08240d3e3c87 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -90,6 +90,7 @@ __ets_dwrr_test() for stream in ${streams[@]}; do ets_start_traffic $stream + defer stop_traffic $! done sleep 10 @@ -120,25 +121,24 @@ __ets_dwrr_test() ${d[0]} ${d[$i]} fi done - - for stream in ${streams[@]}; do - stop_traffic - done } ets_dwrr_test_012() { - __ets_dwrr_test 0 1 2 + in_defer_scope \ + __ets_dwrr_test 0 1 2 } ets_dwrr_test_01() { - __ets_dwrr_test 0 1 + in_defer_scope \ + __ets_dwrr_test 0 1 } ets_dwrr_test_12() { - __ets_dwrr_test 1 2 + in_defer_scope \ + __ets_dwrr_test 1 2 } ets_qdisc_setup() diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 17f28644568e..af166662b78a 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -53,71 +53,63 @@ PKTSZ=1400 h1_create() { simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + mtu_set $h1 10000 + defer mtu_restore $h1 + tc qdisc replace dev $h1 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M -} - -h1_destroy() -{ - tc qdisc del dev $h1 root - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/28 + defer tc qdisc del dev $h1 root } h2_create() { simple_if_init $h2 192.0.2.2/28 - mtu_set $h2 10000 -} + defer simple_if_fini $h2 192.0.2.2/28 -h2_destroy() -{ - mtu_restore $h2 - simple_if_fini $h2 192.0.2.2/28 + mtu_set $h2 10000 + defer mtu_restore $h2 } h3_create() { simple_if_init $h3 192.0.2.3/28 - mtu_set $h3 10000 -} + defer simple_if_fini $h3 192.0.2.3/28 -h3_destroy() -{ - mtu_restore $h3 - simple_if_fini $h3 192.0.2.3/28 + mtu_set $h3 10000 + defer mtu_restore $h3 } switch_create() { ip link add dev br up type bridge + defer ip link del dev br + ip link set dev $swp1 up master br + defer ip link set dev $swp1 down nomaster + ip link set dev $swp2 up master br + defer ip link set dev $swp2 down nomaster + ip link set dev $swp3 up master br + defer ip link set dev $swp3 down nomaster mtu_set $swp1 10000 + defer mtu_restore $h1 + mtu_set $swp2 10000 + defer mtu_restore $h2 + mtu_set $swp3 10000 + defer mtu_restore $h3 tc qdisc replace dev $swp3 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M - ip link add name _drop_test up type dummy -} + defer tc qdisc del dev $swp3 root -switch_destroy() -{ - ip link del dev _drop_test - tc qdisc del dev $swp3 root - - mtu_restore $h3 - mtu_restore $h2 - mtu_restore $h1 - - ip link set dev $swp3 down nomaster - ip link set dev $swp2 down nomaster - ip link set dev $swp1 down nomaster - ip link del dev br + ip link add name _drop_test up type dummy + defer ip link del dev _drop_test } setup_prepare() @@ -134,6 +126,7 @@ setup_prepare() h3_mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -141,18 +134,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.3 " from host 1" @@ -287,6 +268,7 @@ do_ecn_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -298,9 +280,6 @@ do_ecn_test() build_backlog $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "$name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_nodrop_test() @@ -310,6 +289,7 @@ do_ecn_nodrop_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -321,9 +301,6 @@ do_ecn_nodrop_test() build_backlog $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "$name backlog > limit: UDP not dropped" - - stop_traffic - sleep 1 } do_red_test() @@ -336,6 +313,7 @@ do_red_test() # is above limit. $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! # Pushing below the queue limit should work. RET=0 @@ -352,9 +330,6 @@ do_red_test() pct=$(check_marking "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "RED backlog > limit" - - stop_traffic - sleep 1 } do_red_qevent_test() @@ -369,6 +344,7 @@ do_red_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t udp -q & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -396,9 +372,6 @@ do_red_qevent_test() check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" log_test "RED early_dropped packets mirrored" - - stop_traffic - sleep 1 } do_ecn_qevent_test() @@ -410,6 +383,7 @@ do_ecn_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -428,9 +402,6 @@ do_ecn_qevent_test() tc filter del block 10 pref 1234 handle 102 matchall log_test "ECN marked packets mirrored" - - stop_traffic - sleep 1 } install_qdisc() @@ -451,36 +422,36 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc xfail_on_slow do_ecn_test $BACKLOG - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc xfail_on_slow do_ecn_nodrop_test $BACKLOG - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc xfail_on_slow do_red_test $BACKLOG - uninstall_qdisc } red_qevent_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc xfail_on_slow do_red_qevent_test $BACKLOG - uninstall_qdisc } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc xfail_on_slow do_ecn_qevent_test $BACKLOG - uninstall_qdisc } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index 9cd884d4a5de..ec309a5086bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -60,68 +60,65 @@ host_create() local host=$1; shift simple_if_init $dev + defer simple_if_fini $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ flower $TCFLAGS vlan_id 10 action pass tc filter add dev $h2 ingress pref 1011 prot 802.1q \ flower $TCFLAGS vlan_id 11 action pass } -h2_destroy() -{ - tc qdisc del dev $h2 clsact - host_destroy $h2 -} - switch_create() { local intf local vlan ip link add dev br10 type bridge + defer ip link del dev br10 + ip link add dev br11 type bridge + defer ip link del dev br11 for intf in $swp1 $swp2; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan down done done @@ -130,34 +127,10 @@ switch_create() done ip link set dev br10 up - ip link set dev br11 up -} - -switch_destroy() -{ - local intf - local vlan - - # A test may have been interrupted mid-run, with Qdisc installed. Delete - # it here. - tc qdisc del dev $swp2 root 2>/dev/null - - ip link set dev br11 down - ip link set dev br10 down + defer ip link set dev br10 down - for intf in $swp2 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br11 - ip link del dev br10 + ip link set dev br11 up + defer ip link set dev br11 down } setup_prepare() @@ -177,23 +150,13 @@ setup_prepare() h2_mac=$(mac_get $h2) vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 2 10) " vlan 10" @@ -207,18 +170,18 @@ tbf_get_counter() tc_rule_stats_get $h2 10$vlan ingress .bytes } -do_tbf_test() +__tbf_test() { local vlan=$1; shift local mbit=$1; shift start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + defer stop_traffic $! sleep 5 # Wait for the burst to dwindle local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) sleep 10 local t3=$(tbf_get_counter $vlan) - stop_traffic RET=0 @@ -231,3 +194,9 @@ do_tbf_test() log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } + +do_tbf_test() +{ + in_defer_scope \ + __tbf_test "$@" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index df9bcd6a811a..c182a04282bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -30,8 +30,9 @@ tbf_test() # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + defer tc qdisc del dev $swp2 root + tbf_test_one 128K - tc qdisc del dev $swp2 root } tbf_root_test() @@ -42,6 +43,8 @@ tbf_root_test() tc qdisc replace dev $swp2 root handle 1: \ tbf rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ $QDISC 3 priomap 2 1 0 tc qdisc replace dev $swp2 parent 10:3 handle 103: \ @@ -53,8 +56,6 @@ tbf_root_test() do_tbf_test 10 400 $bs do_tbf_test 11 400 $bs - - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh index 96c997be0d03..9f20320f8d84 100755 --- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -14,13 +14,14 @@ tbf_test_one() tc qdisc replace dev $swp2 root handle 108: tbf \ rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + do_tbf_test 10 400 $bs } tbf_test() { tbf_test_one 128K - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 5103f64a71d6..509fdedfcfa1 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -148,7 +148,7 @@ police_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } @@ -198,7 +198,7 @@ police_shared_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% } police_shared_test() @@ -278,7 +278,7 @@ police_mirror_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower @@ -320,7 +320,7 @@ police_pps_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 241542441c51..555a868743f0 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -3,3 +3,4 @@ CONFIG_NET_SCH_NETEM=m CONFIG_HSR=y CONFIG_VETH=y CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh index 8e97b1f2e7e5..1dc882ac1c74 100644 --- a/tools/testing/selftests/net/hsr/hsr_common.sh +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -15,7 +15,7 @@ do_ping() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 2" + local ping_args="-q -c 2 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 @@ -36,7 +36,7 @@ do_ping_long() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 10" + local ping_args="-q -c 10 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index f5d207fc770a..5a65f4f836be 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -175,6 +175,100 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } +setup_vlan_interfaces() { + ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 + ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 + ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 + ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 + + ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 + ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 + ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 + ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 + + ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 + ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 + ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 + ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 + + ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 + ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 + ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 + ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + + ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 + ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 + ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 + ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + + ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 + ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 + ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 + ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + + ip -net "$ns1" link set dev hsr1.2 up + ip -net "$ns1" link set dev hsr1.3 up + ip -net "$ns1" link set dev hsr1.4 up + ip -net "$ns1" link set dev hsr1.5 up + + ip -net "$ns2" link set dev hsr2.2 up + ip -net "$ns2" link set dev hsr2.3 up + ip -net "$ns2" link set dev hsr2.4 up + ip -net "$ns2" link set dev hsr2.5 up + + ip -net "$ns3" link set dev hsr3.2 up + ip -net "$ns3" link set dev hsr3.3 up + ip -net "$ns3" link set dev hsr3.4 up + ip -net "$ns3" link set dev hsr3.5 up + +} + +hsr_vlan_ping() { + do_ping "$ns1" 100.64.2.2 + do_ping "$ns1" 100.64.3.2 + do_ping "$ns1" 100.64.4.2 + do_ping "$ns1" 100.64.5.2 + + do_ping "$ns1" 100.64.2.3 + do_ping "$ns1" 100.64.3.3 + do_ping "$ns1" 100.64.4.3 + do_ping "$ns1" 100.64.5.3 + + do_ping "$ns2" 100.64.2.1 + do_ping "$ns2" 100.64.3.1 + do_ping "$ns2" 100.64.4.1 + do_ping "$ns2" 100.64.5.1 + + do_ping "$ns2" 100.64.2.3 + do_ping "$ns2" 100.64.3.3 + do_ping "$ns2" 100.64.4.3 + do_ping "$ns2" 100.64.5.3 + + do_ping "$ns3" 100.64.2.1 + do_ping "$ns3" 100.64.3.1 + do_ping "$ns3" 100.64.4.1 + do_ping "$ns3" 100.64.5.1 + + do_ping "$ns3" 100.64.2.2 + do_ping "$ns3" 100.64.3.2 + do_ping "$ns3" 100.64.4.2 + do_ping "$ns3" 100.64.5.2 +} + +run_vlan_tests() { + vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then + echo "INFO: Running VLAN tests" + setup_vlan_interfaces + hsr_vlan_ping + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + check_prerequisites setup_ns ns1 ns2 ns3 @@ -183,9 +277,13 @@ trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test +run_vlan_tests + setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 do_complete_ping_test +run_vlan_tests + exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings new file mode 100644 index 000000000000..0fbc037f2aa8 --- /dev/null +++ b/tools/testing/selftests/net/hsr/settings @@ -0,0 +1 @@ +timeout=50 diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 12491850ae98..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -3,119 +3,106 @@ # # Author: Justin Iurman <justin.iurman@uliege.be> # -# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data -# consistency directly inside packets on the receiver side. Tests are divided -# into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates -# wider use cases that do not fall into the other two categories). Both OUTPUT -# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL -# tests use the entire three-node topology (alpha, beta, gamma). Each test is -# documented inside its own handler in the code below. +# This script evaluates IOAM for IPv6 by checking local IOAM configurations and +# IOAM data inside packets. There are three categories of tests: LOCAL, OUTPUT, +# and INPUT. The former (LOCAL) checks all IOAM related configurations locally +# without sending packets. OUTPUT tests verify the processing of an IOAM +# encapsulating node, while INPUT tests verify the processing of an IOAM transit +# node. Both OUTPUT and INPUT tests send packets. Each test is documented inside +# its own handler. # -# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. -# When either Beta or Gamma is the destination (depending on the test category), -# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. +# The topology used for OUTPUT and INPUT tests is made of three nodes: +# - Alpha (the IOAM encapsulating node) +# - Beta (the IOAM transit node) +# - Gamma (the receiver) ** # +# An IOAM domain is configured from Alpha to Beta, but not on the reverse path. +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. # -# +-------------------+ +-------------------+ -# | | | | -# | Alpha netns | | Gamma netns | -# | | | | -# | +-------------+ | | +-------------+ | -# | | veth0 | | | | veth0 | | -# | | db01::2/64 | | | | db02::2/64 | | -# | +-------------+ | | +-------------+ | -# | . | | . | -# +-------------------+ +-------------------+ -# . . -# . . -# . . -# +----------------------------------------------------+ -# | . . | -# | +-------------+ +-------------+ | -# | | veth0 | | veth1 | | -# | | db01::1/64 | ................ | db02::1/64 | | -# | +-------------+ +-------------+ | -# | | -# | Beta netns | -# | | -# +----------------------------------------------------+ +# ** Gamma is required because ioam6_parser.c uses a packet socket and we need +# to see IOAM data inserted by the very last node (Beta), which would happen +# _after_ we get a copy of the packet on Beta. Note that using an +# IPv6 raw socket with IPV6_RECVHOPOPTS on Beta would not be enough: we also +# need to access the IPv6 header to check some fields (e.g., source and +# destination addresses), which is not possible in that case. As a +# consequence, we need Gamma as a receiver to run ioam6_parser.c which uses a +# packet socket. # # +# +-----------------------+ +-----------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------------+ | | +-------------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | | +# | +-------------------+ | | +-------------------+ | +# | . | | . | +# +-----------.-----------+ +-----------.-----------+ +# . . +# . . +# . . +# +-----------.----------------------------------.-----------+ +# | . . | +# | +-------------------+ +-------------------+ | +# | | veth0 | | veth1 | | +# | | 2001:db8:1::1/64 | ............ | 2001:db8:2::1/64 | | +# | +-------------------+ +-------------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------------+ # -# ============================================================= -# | Alpha - IOAM configuration | -# +===========================================================+ -# | Node ID | 1 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 11111111 | -# +-----------------------------------------------------------+ -# | Ingress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Egress ID | 101 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 101101 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee0 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf00dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 777 | -# +-----------------------------------------------------------+ -# | Schema Data | something that will be 4n-aligned | -# +-----------------------------------------------------------+ # # -# ============================================================= -# | Beta - IOAM configuration | -# +===========================================================+ -# | Node ID | 2 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 22222222 | -# +-----------------------------------------------------------+ -# | Ingress ID | 201 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 201201 | -# +-----------------------------------------------------------+ -# | Egress ID | 202 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 202202 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee1 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf11dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 666 | -# +-----------------------------------------------------------+ -# | Schema Data | Hello there -Obi | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Alpha - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 1 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 11111111 | +# +---------------------+------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Egress ID | 101 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 101101 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xdeadbeef | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +---------------------+------------------------------------+ +# | Schema ID | 777 | +# +---------------------+------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +---------------------+------------------------------------+ # # -# ============================================================= -# | Gamma - IOAM configuration | -# +===========================================================+ -# | Node ID | 3 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 33333333 | -# +-----------------------------------------------------------+ -# | Ingress ID | 301 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 301301 | -# +-----------------------------------------------------------+ -# | Egress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee2 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf22dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Beta - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 2 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 22222222 | +# +---------------------+------------------------------------+ +# | Ingress ID | 201 | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 201201 | +# +---------------------+------------------------------------+ +# | Egress ID | 202 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 202202 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xffffffffffffffff (default value) | +# +---------------------+------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +---------------------+------------------------------------+ +# | Schema Data | | +# +---------------------+------------------------------------+ source lib.sh @@ -128,64 +115,69 @@ source lib.sh ################################################################################ ALPHA=( - 1 # ID - 11111111 # Wide ID - 0xffff # Ingress ID - 0xffffffff # Ingress Wide ID - 101 # Egress ID - 101101 # Egress Wide ID - 0xdeadbee0 # Namespace Data - 0xcafec0caf00dc0de # Namespace Wide Data - 777 # Schema ID (0xffffff = None) - "something that will be 4n-aligned" # Schema Data + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID (default value) + 0xffffffff # Ingress Wide ID (default value) + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbeef # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID + "something that will be 4n-aligned" # Schema Data ) BETA=( - 2 - 22222222 - 201 - 201201 - 202 - 202202 - 0xdeadbee1 - 0xcafec0caf11dc0de - 666 - "Hello there -Obi" + 2 # ID + 22222222 # Wide ID + 201 # Ingress ID + 201201 # Ingress Wide ID + 202 # Egress ID + 202202 # Egress Wide ID + 0xffffffff # Namespace Data (empty value) + 0xffffffffffffffff # Namespace Wide Data (empty value) + 0xffffff # Schema ID (empty value) + "" # Schema Data (empty value) ) -GAMMA=( - 3 - 33333333 - 301 - 301301 - 0xffff - 0xffffffff - 0xdeadbee2 - 0xcafec0caf22dc0de - 0xffffff - "" -) +TESTS_LOCAL=" + local_sysctl_ioam_id + local_sysctl_ioam_id_wide + local_sysctl_ioam_intf_id + local_sysctl_ioam_intf_id_wide + local_sysctl_ioam_intf_enabled + local_ioam_namespace + local_ioam_schema + local_ioam_schema_namespace + local_route_ns + local_route_tunsrc + local_route_tundst + local_route_trace_type + local_route_trace_size + local_route_trace_type_bits + local_route_trace_size_values +" TESTS_OUTPUT=" - out_undef_ns - out_no_room - out_bits - out_full_supp_trace + output_undef_ns + output_no_room + output_no_room_oss + output_bits + output_sizes + output_full_supp_trace " TESTS_INPUT=" - in_undef_ns - in_no_room - in_oflag - in_bits - in_full_supp_trace + input_undef_ns + input_no_room + input_no_room_oss + input_disabled + input_oflag + input_bits + input_sizes + input_full_supp_trace " -TESTS_GLOBAL=" - fwd_full_supp_trace -" - - ################################################################################ # # # LIBRARY # @@ -194,66 +186,64 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - setup_ns ioam_tmp_node - ip link add name veth0 netns $ioam_tmp_node type veth \ - peer name veth1 netns $ioam_tmp_node + setup_ns ioam_tmp_node &>/dev/null + local ret=$? - ip -netns $ioam_tmp_node link set veth0 up - ip -netns $ioam_tmp_node link set veth1 up + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace add 0 - ns_ad=$? + ip -netns $ioam_tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" - ns_sh=$? + ip -netns $ioam_tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) - if [[ $ns_ad != 0 || $ns_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: kernel version probably too old, missing ioam support" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Setup failed." + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - tr_ad=$? + ip -netns $ioam_tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + ret=$? - ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" - tr_sh=$? + ip -netns $ioam_tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + ret=$((ret + $?)) - if [[ $tr_ad != 0 || $tr_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ - "without CONFIG_IPV6_IOAM6_LWTUNNEL?" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Cannot attach an IOAM trace to a route. Was your kernel" \ + "compiled without CONFIG_IPV6_IOAM6_LWTUNNEL? Are you running an" \ + "old kernel? Are you using an old version of iproute2?" + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + cleanup_ns $ioam_tmp_node - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" ip6tnl_loaded=$? - if [ $ip6tnl_loaded = 0 ] + if [ $ip6tnl_loaded == 0 ] then encap_tests=0 else modprobe ip6_tunnel &>/dev/null - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" encap_tests=$? if [ $encap_tests != 0 ] then - ip a | grep -q "ip6tnl0" + ip a 2>/dev/null | grep -q "ip6tnl0" encap_tests=$? if [ $encap_tests != 0 ] then echo "Note: ip6_tunnel not found neither as a module nor inside the" \ - "kernel, tests that require it (encap mode) will be omitted" + "kernel. Any tests that require it will be skipped." fi fi fi @@ -261,477 +251,1400 @@ check_kernel_compatibility() cleanup() { - ip link del ioam-veth-alpha 2>/dev/null || true - ip link del ioam-veth-gamma 2>/dev/null || true - - cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma if [ $ip6tnl_loaded != 0 ] then - modprobe -r ip6_tunnel 2>/dev/null || true + modprobe -r ip6_tunnel &>/dev/null fi } setup() { - setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma &>/dev/null ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ - peer name ioam-veth-betaL netns $ioam_node_beta + peer name ioam-veth-betaL netns $ioam_node_beta &>/dev/null ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ - peer name ioam-veth-gamma netns $ioam_node_gamma - - ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 - ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 - - ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 - ip -netns $ioam_node_alpha link set veth0 up - ip -netns $ioam_node_alpha link set lo up - ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 - ip -netns $ioam_node_alpha route del db01::/64 - ip -netns $ioam_node_alpha route add db01::/64 dev veth0 - - ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 - ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 - ip -netns $ioam_node_beta link set veth0 up - ip -netns $ioam_node_beta link set veth1 up - ip -netns $ioam_node_beta link set lo up - - ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 - ip -netns $ioam_node_gamma link set veth0 up - ip -netns $ioam_node_gamma link set lo up - ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 - - # - IOAM config - - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + peer name ioam-veth-gamma netns $ioam_node_gamma &>/dev/null + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null + + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha link set veth0 up &>/dev/null + ip -netns $ioam_node_alpha link set lo up &>/dev/null + ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $ioam_node_beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $ioam_node_beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $ioam_node_beta link set veth0 up &>/dev/null + ip -netns $ioam_node_beta link set veth1 up &>/dev/null + ip -netns $ioam_node_beta link set lo up &>/dev/null + + ip -netns $ioam_node_gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_gamma link set veth0 up &>/dev/null + ip -netns $ioam_node_gamma link set lo up &>/dev/null + ip -netns $ioam_node_gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + # - Alpha: IOAM config - + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam schema add ${ALPHA[8]} "${ALPHA[9]}" &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null + + # - Beta: IOAM config - + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id=${BETA[0]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} &>/dev/null + ip -netns $ioam_node_beta ioam namespace add 123 &>/dev/null sleep 1 - ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - echo "Setup FAILED" - cleanup &>/dev/null - exit 0 + echo "SKIP: Setup failed." + cleanup + exit $ksft_skip fi } log_test_passed() { - local desc=$1 - printf "TEST: %-60s [ OK ]\n" "${desc}" + printf " - TEST: %-57s [ OK ]\n" "$1" + npassed=$((npassed+1)) } -log_test_failed() +log_test_skipped() { - local desc=$1 - printf "TEST: %-60s [FAIL]\n" "${desc}" + printf " - TEST: %-57s [SKIP]\n" "$1" + nskipped=$((nskipped+1)) } -log_results() +log_test_failed() { - echo "- Tests passed: ${npassed}" - echo "- Tests failed: ${nfailed}" + printf " - TEST: %-57s [FAIL]\n" "$1" + nfailed=$((nfailed+1)) } run_test() { local name=$1 local desc=$2 - local node_src=$3 - local node_dst=$4 - local ip6_dst=$5 - local trace_type=$6 - local ioam_ns=$7 - local type=$8 - - ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & + local ip6_src=$3 + local trace_type=$4 + local trace_size=$5 + local ioam_ns=$6 + local type=$7 + + ip netns exec $ioam_node_gamma \ + ./ioam6_parser veth0 $name $ip6_src 2001:db8:2::2 \ + $trace_type $trace_size $ioam_ns $type & local spid=$! sleep 0.1 - ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + ip netns exec $ioam_node_alpha ping6 -t 64 -c 1 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - if [ $? = 0 ] - then - npassed=$((npassed+1)) - log_test_passed "${desc}" - else - nfailed=$((nfailed+1)) - log_test_failed "${desc}" - fi + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" fi } run() { + local test + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s LOCAL tests %-29s |" echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "OUTPUT tests" - printf "%0.s-" {1..74} + + echo + echo "Global config" + for test in $TESTS_LOCAL + do + $test + done + + echo + echo "Inline mode" + for test in $TESTS_LOCAL + do + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_LOCAL + do + $test "encap" + done + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s OUTPUT tests %-28s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null - for t in $TESTS_OUTPUT + echo + echo "Inline mode" + for test in $TESTS_OUTPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" done - # clean OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + echo + echo "Encap mode" + for test in $TESTS_OUTPUT + do + $test "encap" + done + echo + echo "Encap mode (with tunsrc)" + for test in $TESTS_OUTPUT + do + $test "encap" "tunsrc" + done + + # clean OUTPUT settings + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "INPUT tests" - printf "%0.s-" {1..74} + printf "| %-28s INPUT tests %-29s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set INPUT settings - ip -netns $ioam_node_alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null - for t in $TESTS_INPUT + echo + echo "Inline mode" + for test in $TESTS_INPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_INPUT + do + $test "encap" done # clean INPUT settings - ip -netns $ioam_node_alpha ioam namespace add 123 \ - data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "GLOBAL tests" - printf "%0.s-" {1..74} + printf "| %-30s Results %-31s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - for t in $TESTS_GLOBAL - do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" - done - echo - log_results + echo "- Passed: ${npassed}" + echo "- Skipped: ${nskipped}" + echo "- Failed: ${nfailed}" + echo } bit2type=( 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 - 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 0x000001 ) -bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 0 ) ################################################################################ # # -# OUTPUT tests # +# LOCAL tests # # # -# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # ################################################################################ -out_undef_ns() +local_sysctl_ioam_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id 2>/dev/null | grep -wq ${ALPHA[0]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_sysctl_ioam_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace if the chosen IOAM # - # namespace is not configured locally. # + # Make sure the sysctl "net.ipv6.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Sysctl net.ipv6.ioam6_id_wide" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ ! -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[1]} - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +local_sysctl_ioam_intf_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id 2>/dev/null | grep -wq ${ALPHA[4]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_no_room() +local_sysctl_ioam_intf_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace and will set the # - # Overflow flag since there is no room enough for its data. # + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Missing trace room" + local desc="Sysctl net.ipv6.conf.XX.ioam6_id_wide" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[5]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up +local_sysctl_ioam_intf_enabled() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_enabled" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_enabled" - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + [ ! -z $1 ] && return - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip netns exec $ioam_node_beta \ + sysctl net.ipv6.conf.veth0.ioam6_enabled 2>/dev/null | grep -wq 1 - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_bits() +local_ioam_namespace() { ############################################################################## - # Make sure that, for each trace type bit, the encap node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure the creation of an IOAM Namespace works as expected. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Create an IOAM Namespace" - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + [ ! -z $1 ] && return - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq 123 + local ret=$? - for i in {0..22} - do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[6]} + ret=$((ret + $?)) - local cmd_res=$? - local descr="${desc/<n>/$i}" + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[7]} + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema() +{ + ############################################################################## + # Make sure the creation of an IOAM Schema works as expected. # + ############################################################################## + local desc="Create an IOAM Schema" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + local sc_data=$( + for i in `seq 0 $((${#ALPHA[9]}-1))` + do + chr=${ALPHA[9]:i:1} + printf "%x " "'${chr}" + done + ) + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -q "$sc_data" + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema_namespace() +{ + ############################################################################## + # Make sure the binding of a Schema to a Namespace works as expected. # + ############################################################################## + local desc="Bind an IOAM Schema to an IOAM Namespace" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq 123 + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_route_ns() +{ + ############################################################################## + # Make sure the Namespace-ID is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Namespace-ID" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tunsrc() +{ + ############################################################################## + # Make sure the Tunnel Source is only (and possibly) used with encap mode. # + ############################################################################## + local desc + local mode + local mode_tunsrc - if [[ $i -ge 12 && $i -le 21 ]] + [ -z $1 ] && return + + if [ "$1" == "encap" ] + then + desc="Optional Tunnel Source" + mode="$1 tundst 2001:db8:2::2" + mode_tunsrc="$1 tunsrc 2001:db8:1::50 tundst 2001:db8:2::2" + else + desc="Unneeded Tunnel Source" + mode="$1" + mode_tunsrc="$1 tunsrc 2001:db8:1::50" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tunsrc trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 != 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tundst() +{ + ############################################################################## + # Make sure the Tunnel Destination is only (and always) used with encap mode.# + ############################################################################## + local desc + + [ -z $1 ] && return + + [ "$1" == "encap" ] && desc="Mandatory Tunnel Destination" \ + || desc="Unneeded Tunnel Destination" + + local mode="$1" + local mode_tundst="$1 tundst 2001:db8:2::2" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tundst trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type() +{ + ############################################################################## + # Make sure the Trace Type is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Type" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_size() +{ + ############################################################################## + # Make sure the Trace Size is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Size" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type_bits() +{ + ############################################################################## + # Make sure only allowed bits (0-11 and 22) are accepted. # + ############################################################################## + local desc="Trace Type bits" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + local i + for i in {0..23} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type ${bit2type[$i]} ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [[ ($? == 0 && (($i -ge 12 && $i -le 21) || $i == 23)) || + ($? != 0 && (($i -ge 0 && $i -le 11) || $i == 22)) ]] then - if [ $cmd_res != 0 ] - then - npassed=$((npassed+1)) - log_test_passed "$descr ($1 mode)" - else - nfailed=$((nfailed+1)) - log_test_failed "$descr ($1 mode)" - fi - else - run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + local err=1 + break fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" - bit2size[22]=$tmp + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } -out_full_supp_trace() +local_route_trace_size_values() { ############################################################################## - # Make sure that the encap node will correctly fill a full trace. Be careful,# - # "full trace" here does NOT mean all bits (only supported ones). # + # Make sure only allowed sizes (multiples of four in [4,244]) are accepted. # ############################################################################## - local desc="Full supported trace" + local desc="Trace Size values" + local mode - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 100 dev veth0 + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + # we also try the next multiple of four after the MAX to check it's refused + local i + for i in {0..248} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [[ ($? == 0 && ($i == 0 || $i == 248 || $(( $i % 4 )) != 0)) || + ($? != 0 && $i != 0 && $i != 248 && $(( $i % 4 )) == 0) ]] + then + local err=1 + break + fi + done + + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } ################################################################################ # # -# INPUT tests # +# OUTPUT tests # # # -# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # -# insertion -> the IOAM namespace configured on the sender is removed # -# and is used in the inserted trace to force the sender not to fill it. # ################################################################################ -in_undef_ns() +output_undef_ns() { ############################################################################## - # Make sure that the receiving node won't fill the trace if the related IOAM # - # namespace is not configured locally. # + # Make sure an IOAM encapsulating node does NOT fill the trace when the # + # corresponding IOAM Namespace-ID is not configured locally. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_no_room() +output_no_room() { ############################################################################## - # Make sure that the receiving node won't fill the trace and will set the # - # Overflow flag if there is no room enough for its data. # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for its data. # ############################################################################## - local desc="Missing trace room" + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_bits() +output_no_room_oss() { ############################################################################## - # Make sure that, for each trace type bit, the receiving node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for the Opaque State Snapshot. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +output_bits() +{ + ############################################################################## + # Make sure an IOAM encapsulating node implements all supported bits by # + # checking it correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + local i for i in {0..11} {22..22} do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "output_bit$i" "${descr}" $saddr \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null bit2size[22]=$tmp } -in_oflag() +output_sizes() { ############################################################################## - # Make sure that the receiving node won't fill the trace since the Overflow # - # flag is set. # + # Make sure an IOAM encapsulating node allocates supported sizes correctly. # ############################################################################## - local desc="Overflow flag is set" + local desc="Trace Size of <n> bytes" + local ns=0 + local tr_type=0x800000 + local mode="$1" + local saddr="2001:db8:1::2" - # Exception: - # Here, we need the sender to set the Overflow flag. For that, we will add - # back the IOAM namespace that was previously configured on the sender. - ip -netns $ioam_node_alpha ioam namespace add 123 + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - # And we clean the exception for this test to get things back to normal for - # other INPUT tests - ip -netns $ioam_node_alpha ioam namespace del 123 + if [ $? == 0 ] + then + run_test "output_size$i" "${descr}" $saddr $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_full_supp_trace() +output_full_supp_trace() { ############################################################################## - # Make sure that the receiving node will correctly fill a full trace. Be # - # careful, "full trace" here does NOT mean all bits (only supported ones). # + # Make sure an IOAM encapsulating node correctly fills a trace when all # + # supported bits are set. # ############################################################################## local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + local i + tr_size=$(( ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } ################################################################################ # # -# GLOBAL tests # +# INPUT tests # # # -# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # ################################################################################ -fwd_full_supp_trace() +input_undef_ns() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the corresponding IOAM # + # Namespace-ID is not configured locally. # + ############################################################################## + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for its data. # + ############################################################################## + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room_oss() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for the Opaque State Snapshot. # + ############################################################################## + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_disabled() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when IOAM is not enabled on # + # the corresponding (ingress) interface. # + ############################################################################## + local desc="IOAM disabled on ingress interface" + local ns=123 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: disable IOAM on ingress interface + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_oflag() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the Overflow flag is # + # set. # + ############################################################################## + local desc="Overflow flag is set" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns $ioam_node_alpha ioam namespace add 123 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_bits() +{ + ############################################################################## + # Make sure an IOAM node implements all supported bits by checking it # + # correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + local i + for i in {0..11} {22..22} + do + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "input_bit$i" "${descr}" 2001:db8:1::2 \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null + + bit2size[22]=$tmp +} + +input_sizes() { ############################################################################## - # Make sure that all three nodes correctly filled the full supported trace # - # by checking that the trace data is consistent with the predefined config. # + # Make sure an IOAM node handles all supported sizes correctly. # ############################################################################## - local desc="Forward - Full supported trace" + local desc="Trace Size of <n> bytes" + local ns=123 + local tr_type=0x800000 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db02::2 0xfff002 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down + if [ $? == 0 ] + then + run_test "input_size$i" "${descr}" 2001:db8:1::2 $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_full_supp_trace() +{ + ############################################################################## + # Make sure an IOAM node correctly fills a trace when all supported bits are # + # set. # + ############################################################################## + local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local i + tr_size=$(( ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } @@ -742,30 +1655,29 @@ fwd_full_supp_trace() ################################################################################ npassed=0 +nskipped=0 nfailed=0 if [ "$(id -u)" -ne 0 ] then - echo "SKIP: Need root privileges" + echo "SKIP: Need root privileges." exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip ioam &>/dev/null -if [ $? = 1 ] -then - echo "SKIP: iproute2 too old, missing ioam command" + echo "SKIP: Could not run test without ip tool." exit $ksft_skip fi check_kernel_compatibility - -cleanup &>/dev/null setup run -cleanup &>/dev/null +cleanup + +if [ $nfailed != 0 ] +then + exit $ksft_fail +fi + +exit $ksft_pass diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 895e5bb5044b..de4b5c9e8a74 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,8 +8,10 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> +#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -40,7 +42,7 @@ static struct ioam_config node1 = { .egr_id = 101, .ingr_wide = 0xffffffff, /* default value */ .egr_wide = 101101, - .ns_data = 0xdeadbee0, + .ns_data = 0xdeadbeef, .ns_wide = 0xcafec0caf00dc0de, .sc_id = 777, .sc_data = "something that will be 4n-aligned", @@ -54,33 +56,22 @@ static struct ioam_config node2 = { .egr_id = 202, .ingr_wide = 201201, .egr_wide = 202202, - .ns_data = 0xdeadbee1, - .ns_wide = 0xcafec0caf11dc0de, - .sc_id = 666, - .sc_data = "Hello there -Obi", - .hlim = 63, -}; - -static struct ioam_config node3 = { - .id = 3, - .wide = 33333333, - .ingr_id = 301, - .egr_id = 0xffff, /* default value */ - .ingr_wide = 301301, - .egr_wide = 0xffffffff, /* default value */ - .ns_data = 0xdeadbee2, - .ns_wide = 0xcafec0caf22dc0de, + .ns_data = 0xffffffff, /* default value */ + .ns_wide = 0xffffffffffffffff, /* default value */ .sc_id = 0xffffff, /* default value */ .sc_data = NULL, - .hlim = 62, + .hlim = 63, }; enum { /********** * OUTPUT * **********/ + __TEST_OUT_MIN, + TEST_OUT_UNDEF_NS, TEST_OUT_NO_ROOM, + TEST_OUT_NO_ROOM_OSS, TEST_OUT_BIT0, TEST_OUT_BIT1, TEST_OUT_BIT2, @@ -94,13 +85,80 @@ enum { TEST_OUT_BIT10, TEST_OUT_BIT11, TEST_OUT_BIT22, + TEST_OUT_SIZE4, + TEST_OUT_SIZE8, + TEST_OUT_SIZE12, + TEST_OUT_SIZE16, + TEST_OUT_SIZE20, + TEST_OUT_SIZE24, + TEST_OUT_SIZE28, + TEST_OUT_SIZE32, + TEST_OUT_SIZE36, + TEST_OUT_SIZE40, + TEST_OUT_SIZE44, + TEST_OUT_SIZE48, + TEST_OUT_SIZE52, + TEST_OUT_SIZE56, + TEST_OUT_SIZE60, + TEST_OUT_SIZE64, + TEST_OUT_SIZE68, + TEST_OUT_SIZE72, + TEST_OUT_SIZE76, + TEST_OUT_SIZE80, + TEST_OUT_SIZE84, + TEST_OUT_SIZE88, + TEST_OUT_SIZE92, + TEST_OUT_SIZE96, + TEST_OUT_SIZE100, + TEST_OUT_SIZE104, + TEST_OUT_SIZE108, + TEST_OUT_SIZE112, + TEST_OUT_SIZE116, + TEST_OUT_SIZE120, + TEST_OUT_SIZE124, + TEST_OUT_SIZE128, + TEST_OUT_SIZE132, + TEST_OUT_SIZE136, + TEST_OUT_SIZE140, + TEST_OUT_SIZE144, + TEST_OUT_SIZE148, + TEST_OUT_SIZE152, + TEST_OUT_SIZE156, + TEST_OUT_SIZE160, + TEST_OUT_SIZE164, + TEST_OUT_SIZE168, + TEST_OUT_SIZE172, + TEST_OUT_SIZE176, + TEST_OUT_SIZE180, + TEST_OUT_SIZE184, + TEST_OUT_SIZE188, + TEST_OUT_SIZE192, + TEST_OUT_SIZE196, + TEST_OUT_SIZE200, + TEST_OUT_SIZE204, + TEST_OUT_SIZE208, + TEST_OUT_SIZE212, + TEST_OUT_SIZE216, + TEST_OUT_SIZE220, + TEST_OUT_SIZE224, + TEST_OUT_SIZE228, + TEST_OUT_SIZE232, + TEST_OUT_SIZE236, + TEST_OUT_SIZE240, + TEST_OUT_SIZE244, TEST_OUT_FULL_SUPP_TRACE, + __TEST_OUT_MAX, + /********* * INPUT * *********/ + __TEST_IN_MIN, + TEST_IN_UNDEF_NS, TEST_IN_NO_ROOM, + TEST_IN_NO_ROOM_OSS, + TEST_IN_DISABLED, TEST_IN_OFLAG, TEST_IN_BIT0, TEST_IN_BIT1, @@ -115,36 +173,107 @@ enum { TEST_IN_BIT10, TEST_IN_BIT11, TEST_IN_BIT22, + TEST_IN_SIZE4, + TEST_IN_SIZE8, + TEST_IN_SIZE12, + TEST_IN_SIZE16, + TEST_IN_SIZE20, + TEST_IN_SIZE24, + TEST_IN_SIZE28, + TEST_IN_SIZE32, + TEST_IN_SIZE36, + TEST_IN_SIZE40, + TEST_IN_SIZE44, + TEST_IN_SIZE48, + TEST_IN_SIZE52, + TEST_IN_SIZE56, + TEST_IN_SIZE60, + TEST_IN_SIZE64, + TEST_IN_SIZE68, + TEST_IN_SIZE72, + TEST_IN_SIZE76, + TEST_IN_SIZE80, + TEST_IN_SIZE84, + TEST_IN_SIZE88, + TEST_IN_SIZE92, + TEST_IN_SIZE96, + TEST_IN_SIZE100, + TEST_IN_SIZE104, + TEST_IN_SIZE108, + TEST_IN_SIZE112, + TEST_IN_SIZE116, + TEST_IN_SIZE120, + TEST_IN_SIZE124, + TEST_IN_SIZE128, + TEST_IN_SIZE132, + TEST_IN_SIZE136, + TEST_IN_SIZE140, + TEST_IN_SIZE144, + TEST_IN_SIZE148, + TEST_IN_SIZE152, + TEST_IN_SIZE156, + TEST_IN_SIZE160, + TEST_IN_SIZE164, + TEST_IN_SIZE168, + TEST_IN_SIZE172, + TEST_IN_SIZE176, + TEST_IN_SIZE180, + TEST_IN_SIZE184, + TEST_IN_SIZE188, + TEST_IN_SIZE192, + TEST_IN_SIZE196, + TEST_IN_SIZE200, + TEST_IN_SIZE204, + TEST_IN_SIZE208, + TEST_IN_SIZE212, + TEST_IN_SIZE216, + TEST_IN_SIZE220, + TEST_IN_SIZE224, + TEST_IN_SIZE228, + TEST_IN_SIZE232, + TEST_IN_SIZE236, + TEST_IN_SIZE240, + TEST_IN_SIZE244, TEST_IN_FULL_SUPP_TRACE, - /********** - * GLOBAL * - **********/ - TEST_FWD_FULL_SUPP_TRACE, + __TEST_IN_MAX, __TEST_MAX, }; -static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_header(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || - __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + if (__be16_to_cpu(trace->namespace_id) != ioam_ns || + __be32_to_cpu(trace->type_be32) != (trace_type << 8)) return 1; switch (tid) { case TEST_OUT_UNDEF_NS: case TEST_IN_UNDEF_NS: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen != 1; + case TEST_IN_DISABLED: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 1; case TEST_OUT_NO_ROOM: case TEST_IN_NO_ROOM: case TEST_IN_OFLAG: - return !ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen != 1; + return trace->overflow == 0 || + trace->nodelen != 2 || + trace->remlen != 1; + + case TEST_OUT_NO_ROOM_OSS: + return trace->overflow == 0 || + trace->nodelen != 0 || + trace->remlen != 1; + + case TEST_IN_NO_ROOM_OSS: + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return trace->overflow == 1 || + trace->nodelen != 0 || + trace->remlen != 0; case TEST_OUT_BIT0: case TEST_IN_BIT0: @@ -164,9 +293,9 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT7: case TEST_OUT_BIT11: case TEST_IN_BIT11: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 0; case TEST_OUT_BIT8: case TEST_IN_BIT8: @@ -174,22 +303,145 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT9: case TEST_OUT_BIT10: case TEST_IN_BIT10: - return ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen; - - case TEST_OUT_BIT22: - case TEST_IN_BIT22: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 2 || + trace->remlen != 0; + + case TEST_OUT_SIZE4: + case TEST_OUT_SIZE8: + case TEST_OUT_SIZE12: + case TEST_OUT_SIZE16: + case TEST_OUT_SIZE20: + case TEST_OUT_SIZE24: + case TEST_OUT_SIZE28: + case TEST_OUT_SIZE32: + case TEST_OUT_SIZE36: + case TEST_OUT_SIZE40: + case TEST_OUT_SIZE44: + case TEST_OUT_SIZE48: + case TEST_OUT_SIZE52: + case TEST_OUT_SIZE56: + case TEST_OUT_SIZE60: + case TEST_OUT_SIZE64: + case TEST_OUT_SIZE68: + case TEST_OUT_SIZE72: + case TEST_OUT_SIZE76: + case TEST_OUT_SIZE80: + case TEST_OUT_SIZE84: + case TEST_OUT_SIZE88: + case TEST_OUT_SIZE92: + case TEST_OUT_SIZE96: + case TEST_OUT_SIZE100: + case TEST_OUT_SIZE104: + case TEST_OUT_SIZE108: + case TEST_OUT_SIZE112: + case TEST_OUT_SIZE116: + case TEST_OUT_SIZE120: + case TEST_OUT_SIZE124: + case TEST_OUT_SIZE128: + case TEST_OUT_SIZE132: + case TEST_OUT_SIZE136: + case TEST_OUT_SIZE140: + case TEST_OUT_SIZE144: + case TEST_OUT_SIZE148: + case TEST_OUT_SIZE152: + case TEST_OUT_SIZE156: + case TEST_OUT_SIZE160: + case TEST_OUT_SIZE164: + case TEST_OUT_SIZE168: + case TEST_OUT_SIZE172: + case TEST_OUT_SIZE176: + case TEST_OUT_SIZE180: + case TEST_OUT_SIZE184: + case TEST_OUT_SIZE188: + case TEST_OUT_SIZE192: + case TEST_OUT_SIZE196: + case TEST_OUT_SIZE200: + case TEST_OUT_SIZE204: + case TEST_OUT_SIZE208: + case TEST_OUT_SIZE212: + case TEST_OUT_SIZE216: + case TEST_OUT_SIZE220: + case TEST_OUT_SIZE224: + case TEST_OUT_SIZE228: + case TEST_OUT_SIZE232: + case TEST_OUT_SIZE236: + case TEST_OUT_SIZE240: + case TEST_OUT_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != trace_size / 4; + + case TEST_IN_SIZE4: + case TEST_IN_SIZE8: + case TEST_IN_SIZE12: + case TEST_IN_SIZE16: + case TEST_IN_SIZE20: + case TEST_IN_SIZE24: + case TEST_IN_SIZE28: + case TEST_IN_SIZE32: + case TEST_IN_SIZE36: + case TEST_IN_SIZE40: + case TEST_IN_SIZE44: + case TEST_IN_SIZE48: + case TEST_IN_SIZE52: + case TEST_IN_SIZE56: + case TEST_IN_SIZE60: + case TEST_IN_SIZE64: + case TEST_IN_SIZE68: + case TEST_IN_SIZE72: + case TEST_IN_SIZE76: + case TEST_IN_SIZE80: + case TEST_IN_SIZE84: + case TEST_IN_SIZE88: + case TEST_IN_SIZE92: + case TEST_IN_SIZE96: + case TEST_IN_SIZE100: + case TEST_IN_SIZE104: + case TEST_IN_SIZE108: + case TEST_IN_SIZE112: + case TEST_IN_SIZE116: + case TEST_IN_SIZE120: + case TEST_IN_SIZE124: + case TEST_IN_SIZE128: + case TEST_IN_SIZE132: + case TEST_IN_SIZE136: + case TEST_IN_SIZE140: + case TEST_IN_SIZE144: + case TEST_IN_SIZE148: + case TEST_IN_SIZE152: + case TEST_IN_SIZE156: + case TEST_IN_SIZE160: + case TEST_IN_SIZE164: + case TEST_IN_SIZE168: + case TEST_IN_SIZE172: + case TEST_IN_SIZE176: + case TEST_IN_SIZE180: + case TEST_IN_SIZE184: + case TEST_IN_SIZE188: + case TEST_IN_SIZE192: + case TEST_IN_SIZE196: + case TEST_IN_SIZE200: + case TEST_IN_SIZE204: + case TEST_IN_SIZE208: + case TEST_IN_SIZE212: + case TEST_IN_SIZE216: + case TEST_IN_SIZE220: + case TEST_IN_SIZE224: + case TEST_IN_SIZE228: + case TEST_IN_SIZE232: + case TEST_IN_SIZE236: + case TEST_IN_SIZE240: + case TEST_IN_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != (trace_size / 4) - trace->nodelen; case TEST_OUT_FULL_SUPP_TRACE: case TEST_IN_FULL_SUPP_TRACE: - case TEST_FWD_FULL_SUPP_TRACE: - return ioam6h->overflow || - ioam6h->nodelen != 15 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 15 || + trace->remlen != 0; default: break; @@ -198,167 +450,137 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, return 1; } -static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, - const struct ioam_config cnf) +static int check_data(struct ioam6_trace_hdr *trace, __u8 trace_size, + const struct ioam_config cnf, bool is_output) { - unsigned int len; + unsigned int len, i; __u8 aligned; __u64 raw64; __u32 raw32; + __u8 *p; - if (ioam6h->type.bit0) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit1) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.ingr_id != (raw32 >> 16) || - cnf.egr_id != (raw32 & 0xffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit2) - *p += sizeof(__u32); - - if (ioam6h->type.bit3) - *p += sizeof(__u32); - - if (ioam6h->type.bit4) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit5) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit6) - *p += sizeof(__u32); + if (trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21 | trace->type.bit23) + return 1; - if (ioam6h->type.bit7) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + for (i = 0; i < trace->remlen * 4; i++) { + if (trace->data[i] != 0) return 1; - *p += sizeof(__u32); } - if (ioam6h->type.bit8) { - raw64 = __be64_to_cpu(*((__u64 *)*p)); - if (cnf.hlim != (raw64 >> 56) || - cnf.wide != (raw64 & 0xffffffffffffff)) - return 1; - *p += sizeof(__u64); - } + if (trace->remlen * 4 == trace_size) + return 0; - if (ioam6h->type.bit9) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) - return 1; - *p += sizeof(__u32); + p = trace->data + trace->remlen * 4; - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit10) { - if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) return 1; - *p += sizeof(__u64); + p += sizeof(__u32); } - if (ioam6h->type.bit11) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit2) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit12) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit3) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit13) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit14) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ns_data) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit15) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)p)) == 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit16) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit17) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)p)); + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit18) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ingr_wide) return 1; - *p += sizeof(__u32); - } + p += sizeof(__u32); - if (ioam6h->type.bit19) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (__be32_to_cpu(*((__u32 *)p)) != cnf.egr_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit20) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)p)) != cnf.ns_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit21) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit22) { + if (trace->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; - raw32 = __be32_to_cpu(*((__u32 *)*p)); + raw32 = __be32_to_cpu(*((__u32 *)p)); if (aligned != (raw32 >> 24) * 4 || cnf.sc_id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); if (cnf.sc_data) { - if (strncmp((char *)*p, cnf.sc_data, len)) + if (strncmp((char *)p, cnf.sc_data, len)) return 1; - *p += len; + p += len; aligned -= len; while (aligned--) { - if (**p != '\0') + if (*p != '\0') return 1; - *p += sizeof(__u8); + p += sizeof(__u8); } } } @@ -366,151 +588,351 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, return 0; } -static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_ioam_trace(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - __u8 *p; - - if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) + if (check_header(tid, trace, trace_type, trace_size, ioam_ns)) return 1; - p = ioam6h->data + ioam6h->remlen * 4; - - switch (tid) { - case TEST_OUT_BIT0: - case TEST_OUT_BIT1: - case TEST_OUT_BIT2: - case TEST_OUT_BIT3: - case TEST_OUT_BIT4: - case TEST_OUT_BIT5: - case TEST_OUT_BIT6: - case TEST_OUT_BIT7: - case TEST_OUT_BIT8: - case TEST_OUT_BIT9: - case TEST_OUT_BIT10: - case TEST_OUT_BIT11: - case TEST_OUT_BIT22: - case TEST_OUT_FULL_SUPP_TRACE: - return check_ioam6_data(&p, ioam6h, node1); - - case TEST_IN_BIT0: - case TEST_IN_BIT1: - case TEST_IN_BIT2: - case TEST_IN_BIT3: - case TEST_IN_BIT4: - case TEST_IN_BIT5: - case TEST_IN_BIT6: - case TEST_IN_BIT7: - case TEST_IN_BIT8: - case TEST_IN_BIT9: - case TEST_IN_BIT10: - case TEST_IN_BIT11: - case TEST_IN_BIT22: - case TEST_IN_FULL_SUPP_TRACE: - { - __u32 tmp32 = node2.egr_wide; - __u16 tmp16 = node2.egr_id; - int res; - - node2.egr_id = 0xffff; - node2.egr_wide = 0xffffffff; + if (tid > __TEST_OUT_MIN && tid < __TEST_OUT_MAX) + return check_data(trace, trace_size, node1, true); - res = check_ioam6_data(&p, ioam6h, node2); - - node2.egr_id = tmp16; - node2.egr_wide = tmp32; - - return res; - } - - case TEST_FWD_FULL_SUPP_TRACE: - if (check_ioam6_data(&p, ioam6h, node3)) - return 1; - if (check_ioam6_data(&p, ioam6h, node2)) - return 1; - return check_ioam6_data(&p, ioam6h, node1); - - default: - break; - } + if (tid > __TEST_IN_MIN && tid < __TEST_IN_MAX) + return check_data(trace, trace_size, node2, false); return 1; } static int str2id(const char *tname) { - if (!strcmp("out_undef_ns", tname)) + if (!strcmp("output_undef_ns", tname)) return TEST_OUT_UNDEF_NS; - if (!strcmp("out_no_room", tname)) + if (!strcmp("output_no_room", tname)) return TEST_OUT_NO_ROOM; - if (!strcmp("out_bit0", tname)) + if (!strcmp("output_no_room_oss", tname)) + return TEST_OUT_NO_ROOM_OSS; + if (!strcmp("output_bit0", tname)) return TEST_OUT_BIT0; - if (!strcmp("out_bit1", tname)) + if (!strcmp("output_bit1", tname)) return TEST_OUT_BIT1; - if (!strcmp("out_bit2", tname)) + if (!strcmp("output_bit2", tname)) return TEST_OUT_BIT2; - if (!strcmp("out_bit3", tname)) + if (!strcmp("output_bit3", tname)) return TEST_OUT_BIT3; - if (!strcmp("out_bit4", tname)) + if (!strcmp("output_bit4", tname)) return TEST_OUT_BIT4; - if (!strcmp("out_bit5", tname)) + if (!strcmp("output_bit5", tname)) return TEST_OUT_BIT5; - if (!strcmp("out_bit6", tname)) + if (!strcmp("output_bit6", tname)) return TEST_OUT_BIT6; - if (!strcmp("out_bit7", tname)) + if (!strcmp("output_bit7", tname)) return TEST_OUT_BIT7; - if (!strcmp("out_bit8", tname)) + if (!strcmp("output_bit8", tname)) return TEST_OUT_BIT8; - if (!strcmp("out_bit9", tname)) + if (!strcmp("output_bit9", tname)) return TEST_OUT_BIT9; - if (!strcmp("out_bit10", tname)) + if (!strcmp("output_bit10", tname)) return TEST_OUT_BIT10; - if (!strcmp("out_bit11", tname)) + if (!strcmp("output_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit22", tname)) + if (!strcmp("output_bit22", tname)) return TEST_OUT_BIT22; - if (!strcmp("out_full_supp_trace", tname)) + if (!strcmp("output_size4", tname)) + return TEST_OUT_SIZE4; + if (!strcmp("output_size8", tname)) + return TEST_OUT_SIZE8; + if (!strcmp("output_size12", tname)) + return TEST_OUT_SIZE12; + if (!strcmp("output_size16", tname)) + return TEST_OUT_SIZE16; + if (!strcmp("output_size20", tname)) + return TEST_OUT_SIZE20; + if (!strcmp("output_size24", tname)) + return TEST_OUT_SIZE24; + if (!strcmp("output_size28", tname)) + return TEST_OUT_SIZE28; + if (!strcmp("output_size32", tname)) + return TEST_OUT_SIZE32; + if (!strcmp("output_size36", tname)) + return TEST_OUT_SIZE36; + if (!strcmp("output_size40", tname)) + return TEST_OUT_SIZE40; + if (!strcmp("output_size44", tname)) + return TEST_OUT_SIZE44; + if (!strcmp("output_size48", tname)) + return TEST_OUT_SIZE48; + if (!strcmp("output_size52", tname)) + return TEST_OUT_SIZE52; + if (!strcmp("output_size56", tname)) + return TEST_OUT_SIZE56; + if (!strcmp("output_size60", tname)) + return TEST_OUT_SIZE60; + if (!strcmp("output_size64", tname)) + return TEST_OUT_SIZE64; + if (!strcmp("output_size68", tname)) + return TEST_OUT_SIZE68; + if (!strcmp("output_size72", tname)) + return TEST_OUT_SIZE72; + if (!strcmp("output_size76", tname)) + return TEST_OUT_SIZE76; + if (!strcmp("output_size80", tname)) + return TEST_OUT_SIZE80; + if (!strcmp("output_size84", tname)) + return TEST_OUT_SIZE84; + if (!strcmp("output_size88", tname)) + return TEST_OUT_SIZE88; + if (!strcmp("output_size92", tname)) + return TEST_OUT_SIZE92; + if (!strcmp("output_size96", tname)) + return TEST_OUT_SIZE96; + if (!strcmp("output_size100", tname)) + return TEST_OUT_SIZE100; + if (!strcmp("output_size104", tname)) + return TEST_OUT_SIZE104; + if (!strcmp("output_size108", tname)) + return TEST_OUT_SIZE108; + if (!strcmp("output_size112", tname)) + return TEST_OUT_SIZE112; + if (!strcmp("output_size116", tname)) + return TEST_OUT_SIZE116; + if (!strcmp("output_size120", tname)) + return TEST_OUT_SIZE120; + if (!strcmp("output_size124", tname)) + return TEST_OUT_SIZE124; + if (!strcmp("output_size128", tname)) + return TEST_OUT_SIZE128; + if (!strcmp("output_size132", tname)) + return TEST_OUT_SIZE132; + if (!strcmp("output_size136", tname)) + return TEST_OUT_SIZE136; + if (!strcmp("output_size140", tname)) + return TEST_OUT_SIZE140; + if (!strcmp("output_size144", tname)) + return TEST_OUT_SIZE144; + if (!strcmp("output_size148", tname)) + return TEST_OUT_SIZE148; + if (!strcmp("output_size152", tname)) + return TEST_OUT_SIZE152; + if (!strcmp("output_size156", tname)) + return TEST_OUT_SIZE156; + if (!strcmp("output_size160", tname)) + return TEST_OUT_SIZE160; + if (!strcmp("output_size164", tname)) + return TEST_OUT_SIZE164; + if (!strcmp("output_size168", tname)) + return TEST_OUT_SIZE168; + if (!strcmp("output_size172", tname)) + return TEST_OUT_SIZE172; + if (!strcmp("output_size176", tname)) + return TEST_OUT_SIZE176; + if (!strcmp("output_size180", tname)) + return TEST_OUT_SIZE180; + if (!strcmp("output_size184", tname)) + return TEST_OUT_SIZE184; + if (!strcmp("output_size188", tname)) + return TEST_OUT_SIZE188; + if (!strcmp("output_size192", tname)) + return TEST_OUT_SIZE192; + if (!strcmp("output_size196", tname)) + return TEST_OUT_SIZE196; + if (!strcmp("output_size200", tname)) + return TEST_OUT_SIZE200; + if (!strcmp("output_size204", tname)) + return TEST_OUT_SIZE204; + if (!strcmp("output_size208", tname)) + return TEST_OUT_SIZE208; + if (!strcmp("output_size212", tname)) + return TEST_OUT_SIZE212; + if (!strcmp("output_size216", tname)) + return TEST_OUT_SIZE216; + if (!strcmp("output_size220", tname)) + return TEST_OUT_SIZE220; + if (!strcmp("output_size224", tname)) + return TEST_OUT_SIZE224; + if (!strcmp("output_size228", tname)) + return TEST_OUT_SIZE228; + if (!strcmp("output_size232", tname)) + return TEST_OUT_SIZE232; + if (!strcmp("output_size236", tname)) + return TEST_OUT_SIZE236; + if (!strcmp("output_size240", tname)) + return TEST_OUT_SIZE240; + if (!strcmp("output_size244", tname)) + return TEST_OUT_SIZE244; + if (!strcmp("output_full_supp_trace", tname)) return TEST_OUT_FULL_SUPP_TRACE; - if (!strcmp("in_undef_ns", tname)) + if (!strcmp("input_undef_ns", tname)) return TEST_IN_UNDEF_NS; - if (!strcmp("in_no_room", tname)) + if (!strcmp("input_no_room", tname)) return TEST_IN_NO_ROOM; - if (!strcmp("in_oflag", tname)) + if (!strcmp("input_no_room_oss", tname)) + return TEST_IN_NO_ROOM_OSS; + if (!strcmp("input_disabled", tname)) + return TEST_IN_DISABLED; + if (!strcmp("input_oflag", tname)) return TEST_IN_OFLAG; - if (!strcmp("in_bit0", tname)) + if (!strcmp("input_bit0", tname)) return TEST_IN_BIT0; - if (!strcmp("in_bit1", tname)) + if (!strcmp("input_bit1", tname)) return TEST_IN_BIT1; - if (!strcmp("in_bit2", tname)) + if (!strcmp("input_bit2", tname)) return TEST_IN_BIT2; - if (!strcmp("in_bit3", tname)) + if (!strcmp("input_bit3", tname)) return TEST_IN_BIT3; - if (!strcmp("in_bit4", tname)) + if (!strcmp("input_bit4", tname)) return TEST_IN_BIT4; - if (!strcmp("in_bit5", tname)) + if (!strcmp("input_bit5", tname)) return TEST_IN_BIT5; - if (!strcmp("in_bit6", tname)) + if (!strcmp("input_bit6", tname)) return TEST_IN_BIT6; - if (!strcmp("in_bit7", tname)) + if (!strcmp("input_bit7", tname)) return TEST_IN_BIT7; - if (!strcmp("in_bit8", tname)) + if (!strcmp("input_bit8", tname)) return TEST_IN_BIT8; - if (!strcmp("in_bit9", tname)) + if (!strcmp("input_bit9", tname)) return TEST_IN_BIT9; - if (!strcmp("in_bit10", tname)) + if (!strcmp("input_bit10", tname)) return TEST_IN_BIT10; - if (!strcmp("in_bit11", tname)) + if (!strcmp("input_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit22", tname)) + if (!strcmp("input_bit22", tname)) return TEST_IN_BIT22; - if (!strcmp("in_full_supp_trace", tname)) + if (!strcmp("input_size4", tname)) + return TEST_IN_SIZE4; + if (!strcmp("input_size8", tname)) + return TEST_IN_SIZE8; + if (!strcmp("input_size12", tname)) + return TEST_IN_SIZE12; + if (!strcmp("input_size16", tname)) + return TEST_IN_SIZE16; + if (!strcmp("input_size20", tname)) + return TEST_IN_SIZE20; + if (!strcmp("input_size24", tname)) + return TEST_IN_SIZE24; + if (!strcmp("input_size28", tname)) + return TEST_IN_SIZE28; + if (!strcmp("input_size32", tname)) + return TEST_IN_SIZE32; + if (!strcmp("input_size36", tname)) + return TEST_IN_SIZE36; + if (!strcmp("input_size40", tname)) + return TEST_IN_SIZE40; + if (!strcmp("input_size44", tname)) + return TEST_IN_SIZE44; + if (!strcmp("input_size48", tname)) + return TEST_IN_SIZE48; + if (!strcmp("input_size52", tname)) + return TEST_IN_SIZE52; + if (!strcmp("input_size56", tname)) + return TEST_IN_SIZE56; + if (!strcmp("input_size60", tname)) + return TEST_IN_SIZE60; + if (!strcmp("input_size64", tname)) + return TEST_IN_SIZE64; + if (!strcmp("input_size68", tname)) + return TEST_IN_SIZE68; + if (!strcmp("input_size72", tname)) + return TEST_IN_SIZE72; + if (!strcmp("input_size76", tname)) + return TEST_IN_SIZE76; + if (!strcmp("input_size80", tname)) + return TEST_IN_SIZE80; + if (!strcmp("input_size84", tname)) + return TEST_IN_SIZE84; + if (!strcmp("input_size88", tname)) + return TEST_IN_SIZE88; + if (!strcmp("input_size92", tname)) + return TEST_IN_SIZE92; + if (!strcmp("input_size96", tname)) + return TEST_IN_SIZE96; + if (!strcmp("input_size100", tname)) + return TEST_IN_SIZE100; + if (!strcmp("input_size104", tname)) + return TEST_IN_SIZE104; + if (!strcmp("input_size108", tname)) + return TEST_IN_SIZE108; + if (!strcmp("input_size112", tname)) + return TEST_IN_SIZE112; + if (!strcmp("input_size116", tname)) + return TEST_IN_SIZE116; + if (!strcmp("input_size120", tname)) + return TEST_IN_SIZE120; + if (!strcmp("input_size124", tname)) + return TEST_IN_SIZE124; + if (!strcmp("input_size128", tname)) + return TEST_IN_SIZE128; + if (!strcmp("input_size132", tname)) + return TEST_IN_SIZE132; + if (!strcmp("input_size136", tname)) + return TEST_IN_SIZE136; + if (!strcmp("input_size140", tname)) + return TEST_IN_SIZE140; + if (!strcmp("input_size144", tname)) + return TEST_IN_SIZE144; + if (!strcmp("input_size148", tname)) + return TEST_IN_SIZE148; + if (!strcmp("input_size152", tname)) + return TEST_IN_SIZE152; + if (!strcmp("input_size156", tname)) + return TEST_IN_SIZE156; + if (!strcmp("input_size160", tname)) + return TEST_IN_SIZE160; + if (!strcmp("input_size164", tname)) + return TEST_IN_SIZE164; + if (!strcmp("input_size168", tname)) + return TEST_IN_SIZE168; + if (!strcmp("input_size172", tname)) + return TEST_IN_SIZE172; + if (!strcmp("input_size176", tname)) + return TEST_IN_SIZE176; + if (!strcmp("input_size180", tname)) + return TEST_IN_SIZE180; + if (!strcmp("input_size184", tname)) + return TEST_IN_SIZE184; + if (!strcmp("input_size188", tname)) + return TEST_IN_SIZE188; + if (!strcmp("input_size192", tname)) + return TEST_IN_SIZE192; + if (!strcmp("input_size196", tname)) + return TEST_IN_SIZE196; + if (!strcmp("input_size200", tname)) + return TEST_IN_SIZE200; + if (!strcmp("input_size204", tname)) + return TEST_IN_SIZE204; + if (!strcmp("input_size208", tname)) + return TEST_IN_SIZE208; + if (!strcmp("input_size212", tname)) + return TEST_IN_SIZE212; + if (!strcmp("input_size216", tname)) + return TEST_IN_SIZE216; + if (!strcmp("input_size220", tname)) + return TEST_IN_SIZE220; + if (!strcmp("input_size224", tname)) + return TEST_IN_SIZE224; + if (!strcmp("input_size228", tname)) + return TEST_IN_SIZE228; + if (!strcmp("input_size232", tname)) + return TEST_IN_SIZE232; + if (!strcmp("input_size236", tname)) + return TEST_IN_SIZE236; + if (!strcmp("input_size240", tname)) + return TEST_IN_SIZE240; + if (!strcmp("input_size244", tname)) + return TEST_IN_SIZE244; + if (!strcmp("input_full_supp_trace", tname)) return TEST_IN_FULL_SUPP_TRACE; - if (!strcmp("fwd_full_supp_trace", tname)) - return TEST_FWD_FULL_SUPP_TRACE; return -1; } +static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +} + static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -555,119 +977,124 @@ static int get_u16(__u16 *val, const char *arg, int base) return 0; } -static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { - [TEST_OUT_UNDEF_NS] = check_ioam_header, - [TEST_OUT_NO_ROOM] = check_ioam_header, - [TEST_OUT_BIT0] = check_ioam_header_and_data, - [TEST_OUT_BIT1] = check_ioam_header_and_data, - [TEST_OUT_BIT2] = check_ioam_header_and_data, - [TEST_OUT_BIT3] = check_ioam_header_and_data, - [TEST_OUT_BIT4] = check_ioam_header_and_data, - [TEST_OUT_BIT5] = check_ioam_header_and_data, - [TEST_OUT_BIT6] = check_ioam_header_and_data, - [TEST_OUT_BIT7] = check_ioam_header_and_data, - [TEST_OUT_BIT8] = check_ioam_header_and_data, - [TEST_OUT_BIT9] = check_ioam_header_and_data, - [TEST_OUT_BIT10] = check_ioam_header_and_data, - [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT22] = check_ioam_header_and_data, - [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_IN_UNDEF_NS] = check_ioam_header, - [TEST_IN_NO_ROOM] = check_ioam_header, - [TEST_IN_OFLAG] = check_ioam_header, - [TEST_IN_BIT0] = check_ioam_header_and_data, - [TEST_IN_BIT1] = check_ioam_header_and_data, - [TEST_IN_BIT2] = check_ioam_header_and_data, - [TEST_IN_BIT3] = check_ioam_header_and_data, - [TEST_IN_BIT4] = check_ioam_header_and_data, - [TEST_IN_BIT5] = check_ioam_header_and_data, - [TEST_IN_BIT6] = check_ioam_header_and_data, - [TEST_IN_BIT7] = check_ioam_header_and_data, - [TEST_IN_BIT8] = check_ioam_header_and_data, - [TEST_IN_BIT9] = check_ioam_header_and_data, - [TEST_IN_BIT10] = check_ioam_header_and_data, - [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT22] = check_ioam_header_and_data, - [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, -}; +static int get_u8(__u8 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFUL) + return -1; + + *val = res; + return 0; +} int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1, on = 1; - struct ioam6_hdr *opt; - struct cmsghdr *cmsg; - struct msghdr msg; - struct iovec iov; - __u8 buffer[512]; + __u8 buffer[512], *ptr, nexthdr, tr_size; + struct ioam6_trace_hdr *trace; + unsigned int hoplen, ret = 1; + struct ipv6_hopopt_hdr *hbh; + int fd, size, testname_id; + struct in6_addr src, dst; + struct ioam6_hdr *ioam6; + struct timeval timeout; + struct ipv6hdr *ipv6; __u32 tr_type; __u16 ioam_ns; - __u8 *ptr; - if (argc != 5) + if (argc != 9) goto out; - tid = str2id(argv[1]); - if (tid < 0 || !func[tid]) - goto out; + testname_id = str2id(argv[2]); - if (get_u32(&tr_type, argv[2], 16) || - get_u16(&ioam_ns, argv[3], 0)) + if (testname_id < 0 || + inet_pton(AF_INET6, argv[3], &src) != 1 || + inet_pton(AF_INET6, argv[4], &dst) != 1 || + get_u32(&tr_type, argv[5], 16) || + get_u8(&tr_size, argv[6], 0) || + get_u16(&ioam_ns, argv[7], 0)) goto out; - fd = socket(PF_INET6, SOCK_RAW, - !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + nexthdr = (!strcmp(argv[8], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + + hoplen = sizeof(*hbh); + hoplen += 2; // 2-byte padding for alignment + hoplen += sizeof(*ioam6); // IOAM option header + hoplen += sizeof(*trace); // IOAM trace header + hoplen += tr_size; // IOAM trace size + hoplen += (tr_size % 8); // optional padding + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); if (fd < 0) goto out; - setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + argv[1], strlen(argv[1]))) + goto close; - iov.iov_len = 1; - iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); - if (!iov.iov_base) + timeout.tv_sec = 1; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, + (const char *)&timeout, sizeof(timeout))) goto close; recv: - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = buffer; - msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); - - size = recvmsg(fd, &msg, 0); + size = recv(fd, buffer, sizeof(buffer), 0); if (size <= 0) goto close; - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != IPPROTO_IPV6 || - cmsg->cmsg_type != IPV6_HOPOPTS || - cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) - continue; + ipv6 = (struct ipv6hdr *)buffer; + + /* Skip packets that do not have the expected src/dst address or that + * do not have a Hop-by-hop. + */ + if (!ipv6_addr_equal(&ipv6->saddr, &src) || + !ipv6_addr_equal(&ipv6->daddr, &dst) || + ipv6->nexthdr != IPPROTO_HOPOPTS) + goto recv; + + /* Check Hbh's Next Header and Size. */ + hbh = (struct ipv6_hopopt_hdr *)(buffer + sizeof(*ipv6)); + if (hbh->nexthdr != nexthdr || hbh->hdrlen != (hoplen >> 3) - 1) + goto close; - ptr = (__u8 *)CMSG_DATA(cmsg); + /* Check we have a 2-byte padding for alignment. */ + ptr = (__u8 *)hbh + sizeof(*hbh); + if (ptr[0] != IPV6_TLV_PADN && ptr[1] != 0) + goto close; - hoplen = (ptr[1] + 1) << 3; - ptr += sizeof(struct ipv6_hopopt_hdr); + /* Check we now have the IOAM option. */ + ptr += 2; + if (ptr[0] != IPV6_TLV_IOAM) + goto close; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)ptr; + /* Check its size and the IOAM option type. */ + ioam6 = (struct ioam6_hdr *)ptr; + if (ioam6->opt_len != sizeof(*ioam6) - 2 + sizeof(*trace) + tr_size || + ioam6->type != IOAM6_TYPE_PREALLOC) + goto close; - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - ptr += sizeof(*opt); - ret = func[tid](tid, - (struct ioam6_trace_hdr *)ptr, - tr_type, ioam_ns); - goto close; - } + trace = (struct ioam6_trace_hdr *)(ptr + sizeof(*ioam6)); - ptr += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; - } - } + /* Check the trailing 4-byte padding (potentially). */ + ptr = (__u8 *)trace + sizeof(*trace) + tr_size; + if (tr_size % 8 && ptr[0] != IPV6_TLV_PADN && ptr[1] != 2 && + ptr[2] != 0 && ptr[3] != 0) + goto close; - goto recv; + /* Check the IOAM header and data. */ + ret = check_ioam_trace(testname_id, trace, tr_type, tr_size, ioam_ns); close: - free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 000000000000..a6b2b1f9c641 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh +source net_helper.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index be8707bfb46e..8994fec1c38f 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -1,11 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +net_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") +source "$net_dir/lib/sh/defer.sh" + ############################################################################## # Defines : "${WAIT_TIMEOUT:=20}" +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms # Kselftest framework constants. @@ -17,6 +23,11 @@ ksft_skip=4 # namespace list created by setup_ns NS_LIST=() +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + ############################################################################## # Helpers @@ -233,3 +244,218 @@ tc_rule_handle_stats_get() | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1; shift + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + in_defer_scope \ + $current_test + done +} + +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + +check_err() +{ + local err=$1 + local msg=$2 + + if ((err)); then + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + check_err $((!err)) "$msg" +} + +check_err_fail() +{ + local should_fail=$1; shift + local err=$1; shift + local what=$1; shift + + if ((should_fail)); then + check_fail $err "$what succeeded, but should have failed" + else + check_err $err "$what failed" + fi +} + +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +kill_process() +{ + local pid=$1; shift + + # Suppress noise from killing the process. + { kill $pid && wait $pid; } 2>/dev/null +} + +ip_link_add() +{ + local name=$1; shift + + ip link add name "$name" "$@" + defer ip link del dev "$name" +} + +ip_link_master() +{ + local member=$1; shift + local master=$1; shift + + ip link set dev "$member" master "$master" + defer ip link set dev "$member" nomaster +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 82c3264b115e..18b9443454a9 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -10,6 +10,6 @@ TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum -TEST_INCLUDES := $(wildcard py/*.py) +TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index e0a34e5e8dd5..27437590eeb5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -675,22 +675,20 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; - uint16_t ip_len; + uint16_t payload_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; - ip_len = ntohs(ip6h->payload_len); - if (ip_len > len - sizeof(*ip6h)) + payload_len = ntohs(ip6h->payload_len); + if (payload_len > len - sizeof(*ip6h)) return -1; - len = ip_len; iph_addr_p = &ip6h->saddr; - if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len); + return recv_verify_packet_tcp(ip6h + 1, payload_len); else - return recv_verify_packet_udp(ip6h + 1, len); + return recv_verify_packet_udp(ip6h + 1, payload_len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index b6d498d125fe..54d8f5eba810 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,3 +6,4 @@ from .netns import NetNS from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index f571a8b3139b..1a8cbe9acc48 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json import os import random diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 1ace58370c06..076a7e8dc3eb 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -32,18 +32,23 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) + +class NetshaperFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh new file mode 100644 index 000000000000..082f5d38321b --- /dev/null +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# map[(scope_id,track,cleanup_id) -> cleanup_command] +# track={d=default | p=priority} +declare -A __DEFER__JOBS + +# map[(scope_id,track) -> # cleanup_commands] +declare -A __DEFER__NJOBS + +# scope_id of the topmost scope. +__DEFER__SCOPE_ID=0 + +__defer__ndefer_key() +{ + local track=$1; shift + + echo $__DEFER__SCOPE_ID,$track +} + +__defer__defer_key() +{ + local track=$1; shift + local defer_ix=$1; shift + + echo $__DEFER__SCOPE_ID,$track,$defer_ix +} + +__defer__ndefers() +{ + local track=$1; shift + + echo ${__DEFER__NJOBS[$(__defer__ndefer_key $track)]} +} + +__defer__run() +{ + local track=$1; shift + local defer_ix=$1; shift + local defer_key=$(__defer__defer_key $track $defer_ix) + + ${__DEFER__JOBS[$defer_key]} + unset __DEFER__JOBS[$defer_key] +} + +__defer__schedule() +{ + local track=$1; shift + local ndefers=$(__defer__ndefers $track) + local ndefers_key=$(__defer__ndefer_key $track) + local defer_key=$(__defer__defer_key $track $ndefers) + local defer="$@" + + __DEFER__JOBS[$defer_key]="$defer" + __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) +} + +__defer__scope_wipe() +{ + __DEFER__NJOBS[$(__defer__ndefer_key d)]=0 + __DEFER__NJOBS[$(__defer__ndefer_key p)]=0 +} + +defer_scope_push() +{ + ((__DEFER__SCOPE_ID++)) + __defer__scope_wipe +} + +defer_scope_pop() +{ + local defer_ix + + for ((defer_ix=$(__defer__ndefers p); defer_ix-->0; )); do + __defer__run p $defer_ix + done + + for ((defer_ix=$(__defer__ndefers d); defer_ix-->0; )); do + __defer__run d $defer_ix + done + + __defer__scope_wipe + ((__DEFER__SCOPE_ID--)) +} + +defer() +{ + __defer__schedule d "$@" +} + +defer_prio() +{ + __defer__schedule p "$@" +} + +defer_scopes_cleanup() +{ + while ((__DEFER__SCOPE_ID >= 0)); do + defer_scope_pop + done +} + +in_defer_scope() +{ + local ret + + defer_scope_push + "$@" + ret=$? + defer_scope_pop + + return $ret +} + +__defer__scope_wipe diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 5d796622e730..8e3fc05a5397 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := mptcp_lib.sh settings -TEST_INCLUDES := ../lib.sh ../net_helper.sh +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 57325d57e4c6..b48b4e56826a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -259,6 +259,15 @@ check_mptcp_disabled() mptcp_lib_ns_init disabled_ns print_larger_title "New MPTCP socket can be blocked via sysctl" + + # mainly to cover more code + if ! ip netns exec ${disabled_ns} sysctl net.mptcp >/dev/null; then + mptcp_lib_pr_fail "not able to list net.mptcp sysctl knobs" + mptcp_lib_result_fail "not able to list net.mptcp sysctl knobs" + ret=${KSFT_FAIL} + return 1 + fi + # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e8d0a01b4144..c07e2bd3a315 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -887,40 +888,62 @@ check_cestab() fi } -do_transfer() +cond_start_capture() { - local listener_ns="$1" - local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid - local FAILING_LINKS=${FAILING_LINKS:-""} - local fastclose=${fastclose:-""} - local speed=${speed:-"fast"} + local ns="$1" - :> "$cout" - :> "$sout" :> "$capout" if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then + local capuser capfile + if [ -z $SUDO_USER ]; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & cappid=$! sleep 1 fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local port + + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} + port=$(get_port) + + :> "$cout" + :> "$sout" + + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1007,10 +1030,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1026,7 +1046,6 @@ do_transfer() ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" cat /tmp/${connector_ns}.out - cat "$capout" return 1 fi @@ -1043,13 +1062,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -2873,6 +2886,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2963,6 +3002,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests() diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c deleted file mode 100644 index 64d6805381c5..000000000000 --- a/tools/testing/selftests/net/ncdevmem.c +++ /dev/null @@ -1,570 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#define __EXPORTED_HEADERS__ - -#include <linux/uio.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <stdbool.h> -#include <string.h> -#include <errno.h> -#define __iovec_defined -#include <fcntl.h> -#include <malloc.h> -#include <error.h> - -#include <arpa/inet.h> -#include <sys/socket.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <sys/syscall.h> - -#include <linux/memfd.h> -#include <linux/dma-buf.h> -#include <linux/udmabuf.h> -#include <libmnl/libmnl.h> -#include <linux/types.h> -#include <linux/netlink.h> -#include <linux/genetlink.h> -#include <linux/netdev.h> -#include <time.h> -#include <net/if.h> - -#include "netdev-user.h" -#include <ynl.h> - -#define PAGE_SHIFT 12 -#define TEST_PREFIX "ncdevmem" -#define NUM_PAGES 16000 - -#ifndef MSG_SOCK_DEVMEM -#define MSG_SOCK_DEVMEM 0x2000000 -#endif - -/* - * tcpdevmem netcat. Works similarly to netcat but does device memory TCP - * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. - * - * Usage: - * - * On server: - * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7 - * - * On client: - * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ - * nc <server IP> 5201 -p 5201 - * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. - */ - -static char *server_ip = "192.168.1.4"; -static char *client_ip = "192.168.1.2"; -static char *port = "5201"; -static size_t do_validation; -static int start_queue = 8; -static int num_queues = 8; -static char *ifname = "eth1"; -static unsigned int ifindex; -static unsigned int dmabuf_id; - -void print_bytes(void *ptr, size_t size) -{ - unsigned char *p = ptr; - int i; - - for (i = 0; i < size; i++) - printf("%02hhX ", p[i]); - printf("\n"); -} - -void print_nonzero_bytes(void *ptr, size_t size) -{ - unsigned char *p = ptr; - unsigned int i; - - for (i = 0; i < size; i++) - putchar(p[i]); - printf("\n"); -} - -void validate_buffer(void *line, size_t size) -{ - static unsigned char seed = 1; - unsigned char *ptr = line; - int errors = 0; - size_t i; - - for (i = 0; i < size; i++) { - if (ptr[i] != seed) { - fprintf(stderr, - "Failed validation: expected=%u, actual=%u, index=%lu\n", - seed, ptr[i], i); - errors++; - if (errors > 20) - error(1, 0, "validation failed."); - } - seed++; - if (seed == do_validation) - seed = 0; - } - - fprintf(stdout, "Validated buffer\n"); -} - -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - printf("Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) -{ - int ret = 0; - - ret = run_command("sudo ethtool -K %s ntuple off", ifname); - if (ret) - return ret; - - return run_command("sudo ethtool -K %s ntuple on", ifname); -} - -static int configure_headersplit(bool on) -{ - return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, - on ? "on" : "off"); -} - -static int configure_rss(void) -{ - return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); -} - -static int configure_channels(unsigned int rx, unsigned int tx) -{ - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); -} - -static int configure_flow_steering(void) -{ - return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", - ifname, client_ip, server_ip, port, port, start_queue); -} - -static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, - struct netdev_queue_id *queues, - unsigned int n_queue_index, struct ynl_sock **ys) -{ - struct netdev_bind_rx_req *req = NULL; - struct netdev_bind_rx_rsp *rsp = NULL; - struct ynl_error yerr; - - *ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!*ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return -1; - } - - req = netdev_bind_rx_req_alloc(); - netdev_bind_rx_req_set_ifindex(req, ifindex); - netdev_bind_rx_req_set_fd(req, dmabuf_fd); - __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); - - rsp = netdev_bind_rx(*ys, req); - if (!rsp) { - perror("netdev_bind_rx"); - goto err_close; - } - - if (!rsp->_present.id) { - perror("id not present"); - goto err_close; - } - - printf("got dmabuf id=%d\n", rsp->id); - dmabuf_id = rsp->id; - - netdev_bind_rx_req_free(req); - netdev_bind_rx_rsp_free(rsp); - - return 0; - -err_close: - fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); - netdev_bind_rx_req_free(req); - ynl_sock_destroy(*ys); - return -1; -} - -static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) -{ - struct udmabuf_create create; - int ret; - - *devfd = open("/dev/udmabuf", O_RDWR); - if (*devfd < 0) { - error(70, 0, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - } - - *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (*memfd < 0) - error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); - - /* Required for udmabuf */ - ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); - - ret = ftruncate(*memfd, dmabuf_size); - if (ret == -1) - error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); - - memset(&create, 0, sizeof(create)); - - create.memfd = *memfd; - create.offset = 0; - create.size = dmabuf_size; - *buf = ioctl(*devfd, UDMABUF_CREATE, &create); - if (*buf < 0) - error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); -} - -int do_server(void) -{ - char ctrl_data[sizeof(int) * 20000]; - struct netdev_queue_id *queues; - size_t non_page_aligned_frags = 0; - struct sockaddr_in client_addr; - struct sockaddr_in server_sin; - size_t page_aligned_frags = 0; - int devfd, memfd, buf, ret; - size_t total_received = 0; - socklen_t client_addr_len; - bool is_devmem = false; - char *buf_mem = NULL; - struct ynl_sock *ys; - size_t dmabuf_size; - char iobuf[819200]; - char buffer[256]; - int socket_fd; - int client_fd; - size_t i = 0; - int opt = 1; - - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); - - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); - - /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); - - /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering()) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); - - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); - - buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, - buf, 0); - if (buf_mem == MAP_FAILED) - error(1, 0, "mmap()"); - - server_sin.sin_family = AF_INET; - server_sin.sin_port = htons(atoi(port)); - - ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); - if (socket < 0) - error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); - - socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); - if (socket < 0) - error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - printf("binding to address %s:%d\n", server_ip, - ntohs(server_sin.sin_port)); - - ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); - - ret = listen(socket_fd, 1); - if (ret) - error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); - - client_addr_len = sizeof(client_addr); - - inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, - sizeof(buffer)); - printf("Waiting or connection on %s:%d\n", buffer, - ntohs(server_sin.sin_port)); - client_fd = accept(socket_fd, &client_addr, &client_addr_len); - - inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, - sizeof(buffer)); - printf("Got connection from %s:%d\n", buffer, - ntohs(client_addr.sin_port)); - - while (1) { - struct iovec iov = { .iov_base = iobuf, - .iov_len = sizeof(iobuf) }; - struct dmabuf_cmsg *dmabuf_cmsg = NULL; - struct dma_buf_sync sync = { 0 }; - struct cmsghdr *cm = NULL; - struct msghdr msg = { 0 }; - struct dmabuf_token token; - ssize_t ret; - - is_devmem = false; - printf("\n\n"); - - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = ctrl_data; - msg.msg_controllen = sizeof(ctrl_data); - ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); - printf("recvmsg ret=%ld\n", ret); - if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) - continue; - if (ret < 0) { - perror("recvmsg"); - continue; - } - if (ret == 0) { - printf("client exited\n"); - goto cleanup; - } - - i++; - for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { - if (cm->cmsg_level != SOL_SOCKET || - (cm->cmsg_type != SCM_DEVMEM_DMABUF && - cm->cmsg_type != SCM_DEVMEM_LINEAR)) { - fprintf(stdout, "skipping non-devmem cmsg\n"); - continue; - } - - dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); - is_devmem = true; - - if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { - /* TODO: process data copied from skb's linear - * buffer. - */ - fprintf(stdout, - "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", - dmabuf_cmsg->frag_size); - - continue; - } - - token.token_start = dmabuf_cmsg->frag_token; - token.token_count = 1; - - total_received += dmabuf_cmsg->frag_size; - printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", - dmabuf_cmsg->frag_offset >> PAGE_SHIFT, - dmabuf_cmsg->frag_offset % getpagesize(), - dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, - dmabuf_cmsg->frag_token, total_received, - dmabuf_cmsg->dmabuf_id); - - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); - - if (dmabuf_cmsg->frag_size % getpagesize()) - non_page_aligned_frags++; - else - page_aligned_frags++; - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); - - if (do_validation) - validate_buffer( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - else - print_nonzero_bytes( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); - - ret = setsockopt(client_fd, SOL_SOCKET, - SO_DEVMEM_DONTNEED, &token, - sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); - } - if (!is_devmem) - error(1, 0, "flow steering error\n"); - - printf("total_received=%lu\n", total_received); - } - - fprintf(stdout, "%s: ok\n", TEST_PREFIX); - - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - -cleanup: - - munmap(buf_mem, dmabuf_size); - close(client_fd); - close(socket_fd); - close(buf); - close(memfd); - close(devfd); - ynl_sock_destroy(ys); - - return 0; -} - -void run_devmem_tests(void) -{ - struct netdev_queue_id *queues; - int devfd, memfd, buf; - struct ynl_sock *ys; - size_t dmabuf_size; - size_t i = 0; - - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); - - /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); - - queues = calloc(num_queues, sizeof(*queues)); - - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); - - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); - - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); - - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); - - /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); - - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); -} - -int main(int argc, char *argv[]) -{ - int is_server = 0, opt; - - while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { - switch (opt) { - case 'l': - is_server = 1; - break; - case 's': - server_ip = optarg; - break; - case 'c': - client_ip = optarg; - break; - case 'p': - port = optarg; - break; - case 'v': - do_validation = atoll(optarg); - break; - case 'q': - num_queues = atoi(optarg); - break; - case 't': - start_queue = atoi(optarg); - break; - case 'f': - ifname = optarg; - break; - case '?': - printf("unknown option: %c\n", optopt); - break; - } - } - - ifindex = if_nametoindex(ifname); - - for (; optind < argc; optind++) - printf("extra arguments: %s\n", argv[optind]); - - run_devmem_tests(); - - if (is_server) - return do_server(); - - return 0; -} diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 0a64d6d0e29a..64c4f8d9aa6c 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -2,5 +2,6 @@ audit_logread connect_close conntrack_dump_flush +conntrack_reverse_clash sctp_collision nf_queue diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e6c9e777fead..ffe161fac8b5 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -8,6 +8,7 @@ MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh TEST_PROGS += br_netfilter_queue.sh +TEST_PROGS += conntrack_dump_flush.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh @@ -31,14 +32,14 @@ TEST_PROGS += nft_tproxy_tcp.sh TEST_PROGS += nft_tproxy_udp.sh TEST_PROGS += nft_zones_many.sh TEST_PROGS += rpath.sh +TEST_PROGS += vxlan_mtu_frag.sh TEST_PROGS += xt_string.sh TEST_PROGS_EXTENDED = nft_concat_range_perf.sh -TEST_GEN_PROGS = conntrack_dump_flush - TEST_GEN_FILES = audit_logread TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision @@ -54,4 +55,5 @@ TEST_FILES := lib.sh TEST_FILES += packetdrill TEST_INCLUDES := \ - ../lib.sh + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index c5fe7b34eaf1..43d8b500d391 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -7,6 +7,7 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m @@ -84,6 +85,7 @@ CONFIG_NFT_SYNPROXY=m CONFIG_NFT_TPROXY=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m +CONFIG_VXLAN=m CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index dc056fec993b..5f827e10717d 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -43,6 +43,8 @@ static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, @@ -71,6 +73,8 @@ static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_proto(struct nlmsghdr *nlh) @@ -90,6 +94,8 @@ static int build_cta_proto(struct nlmsghdr *nlh) mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, @@ -98,7 +104,7 @@ static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *rplnlh; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -217,7 +223,7 @@ static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -264,7 +270,7 @@ static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh new file mode 100755 index 000000000000..8b0935385849 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +exec unshare -n ./conntrack_dump_flush diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index 073e8e62d350..e95ecb37c2b1 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -32,6 +32,7 @@ source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 +DUMMYNET=10.9.9 PFXL=30 ret=0 @@ -54,6 +55,7 @@ setup_ns ns0 ns1 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" @@ -65,13 +67,18 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then exit $ksft_skip fi +ip -net "$ns0" link add dummy0 type dummy + ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set dummy0 master tvrf ip -net "$ns0" li set tvrf up ip -net "$ns0" li set veth0 up +ip -net "$ns0" li set dummy0 up ip -net "$ns1" li set veth0 up ip -net "$ns0" addr add $IP0/$PFXL dev veth0 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 +ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 listener_ready() { @@ -212,9 +219,35 @@ EOF fi } +test_fib() +{ +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip t { + counter fibcount { } + + chain prerouting { + type filter hook prerouting priority 0; + meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack + } +} +EOF + ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 + ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null + + if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then + echo "PASS: fib lookup returned exepected output interface" + else + echo "FAIL: fib lookup did not return exepected output interface" + ret=1 + return + fi +} + test_ct_zone_in test_masquerade_vrf "default" test_masquerade_vrf "pfifo" test_masquerade_veth +test_fib exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index b3995550856a..a4ee5496f2a1 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -71,6 +71,8 @@ omtu=9000 lmtu=1500 rmtu=2000 +filesize=$((2 * 1024 * 1024)) + usage(){ echo "nft_flowtable.sh [OPTIONS]" echo @@ -81,12 +83,13 @@ usage(){ exit 1 } -while getopts "o:l:r:" o +while getopts "o:l:r:s:" o do case $o in o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; + s) filesize=$OPTARG;; *) usage;; esac done @@ -217,18 +220,10 @@ ns2out=$(mktemp) make_file() { - name=$1 - - SIZE=$((RANDOM % (1024 * 128))) - SIZE=$((SIZE + (1024 * 8))) - TSIZE=$((SIZE * 1024)) - - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + name="$1" + sz="$2" - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + head -c "$sz" < /dev/urandom > "$name" } check_counters() @@ -246,18 +241,18 @@ check_counters() local fs fs=$(du -sb "$nsin") local max_orig=${fs%%/*} - local max_repl=$((max_orig/4)) + local max_repl=$((max_orig)) # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook # should always be lower than the size of the transmitted file (max_orig). if [ "$orig_cnt" -gt "$max_orig" ];then - echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2 ret=1 ok=0 fi if [ "$repl_cnt" -gt $max_repl ];then - echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2 ret=1 ok=0 fi @@ -455,7 +450,7 @@ test_tcp_forwarding_nat() return $lret } -make_file "$nsin" +make_file "$nsin" "$filesize" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. @@ -664,8 +659,16 @@ if [ "$1" = "" ]; then l=$(((RANDOM%mtu) + low)) r=$(((RANDOM%mtu) + low)) - echo "re-run with random mtus: -o $o -l $l -r $r" - $0 -o "$o" -l "$l" -r "$r" + MINSIZE=$((2 * 1000 * 1000)) + MAXSIZE=$((64 * 1000 * 1000)) + + filesize=$(((RANDOM * RANDOM) % MAXSIZE)) + if [ "$filesize" -lt "$MINSIZE" ]; then + filesize=$((filesize+MINSIZE)) + fi + + echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index a9d109fcc15c..785e3875a6da 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -512,10 +512,10 @@ EOF :> "$TMPFILE1" :> "$TMPFILE2" - timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc & + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & local rpid1=$! - timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc & + timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc & local rpid2=$! ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & @@ -528,8 +528,8 @@ EOF # Send two packets, one should end up in ns1, other in ns2. # This is because nfqueue will delay packet for long enough so that # second packet will not find existing conntrack entry. - echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 - echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 4485fd7675ed..86ec4e68594d 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -61,9 +61,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0 ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad +# avoid neighbor lookups and enable martian IPv6 pings +ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0 +ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0 + # firewall matches to test [ -n "$iptables" ] && { common='-t raw -A PREROUTING -s 192.168.0.0/16' + common+=' -p icmp --icmp-type echo-request' if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -72,6 +83,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad } [ -n "$ip6tables" ] && { common='-t raw -A PREROUTING -s fec0::/16' + common+=' -p icmpv6 --icmpv6-type echo-request' if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -82,8 +94,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad table inet t { chain c { type filter hook prerouting priority raw; - ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter - ip6 saddr fec0::/16 fib saddr . iif oif exists counter + ip saddr 192.168.0.0/16 icmp type echo-request \ + fib saddr . iif oif exists counter + ip6 saddr fec0::/16 icmpv6 type echo-request \ + fib saddr . iif oif exists counter } } EOF diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh new file mode 100755 index 000000000000..912cb9583af1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +if ! modprobe -q -n br_netfilter 2>&1; then + echo "SKIP: Test needs br_netfilter kernel module" + exit $ksft_skip +fi + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns host vtep router + +create_topology() +{ + ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep" + ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router" +} + +setup_host() +{ + # bring ports up + ip -n "$host" addr add 10.0.0.1/24 dev host-eth0 + ip -n "$host" link set host-eth0 up + + # Add VLAN 10,20 + for vid in 10 20; do + ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid + ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid + ip -n "$host" link set host-eth0.$vid up + done +} + +setup_vtep() +{ + # create bridge on vtep + ip -n "$vtep" link add name br0 type bridge + ip -n "$vtep" link set br0 type bridge vlan_filtering 1 + + # VLAN 10 is untagged PVID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged + + # VLAN 20 as other VID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 20 + + # single-vxlan device on vtep + ip -n "$vtep" address add dev vtep-router 60.0.0.1/24 + ip -n "$vtep" link add dev vxd type vxlan external \ + vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64 + ip -n "$vtep" link set vxd master br0 + + # Add VLAN-VNI 1-1 mappings + bridge -n "$vtep" link set dev vxd vlan_tunnel on + for vid in 10 20; do + bridge -n "$vtep" vlan add dev vxd vid $vid + bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid + bridge -n "$vtep" vni add dev vxd vni $vid + done + + # bring ports up + ip -n "$vtep" link set vxd up + ip -n "$vtep" link set vtep-router up + ip -n "$vtep" link set vtep-host up + ip -n "$vtep" link set dev br0 up +} + +setup_router() +{ + # bring ports up + ip -n "$router" link set router-vtep up +} + +setup() +{ + modprobe -q br_netfilter + create_topology + setup_host + setup_vtep + setup_router +} + +test_large_mtu_untagged_traffic() +{ + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0 + ip netns exec "$host" \ + ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + return 0 +} + +test_large_mtu_tagged_traffic() +{ + for vid in 10 20; do + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid + ip netns exec "$host" \ + ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + done + return 0 +} + +do_test() +{ + # Frames will be dropped so ping will not succeed + # If it doesn't panic, it passes + test_large_mtu_tagged_traffic + test_large_mtu_untagged_traffic +} + +setup && \ +echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c new file mode 100644 index 000000000000..07423f256f96 --- /dev/null +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/genetlink.h> +#include <linux/neighbour.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/mqueue.h> +#include <linux/rtnetlink.h> + +#include "../kselftest_harness.h" + +#include <ynl.h> + +struct ext_ack { + int err; + + __u32 attr_offs; + __u32 miss_type; + __u32 miss_nest; + const char *str; +}; + +/* 0: no done, 1: done found, 2: extack found, -1: error */ +static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +{ + const struct nlmsghdr *nlh; + const struct nlattr *attr; + ssize_t rem; + + for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { + nlh = (struct nlmsghdr *)&buf[n - rem]; + if (!NLMSG_OK(nlh, rem)) + return -1; + + if (nlh->nlmsg_type != NLMSG_DONE) + continue; + + ea->err = -*(int *)NLMSG_DATA(nlh); + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 1; + + ynl_attr_for_each(attr, nlh, sizeof(int)) { + switch (ynl_attr_type(attr)) { + case NLMSGERR_ATTR_OFFS: + ea->attr_offs = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_TYPE: + ea->miss_type = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_NEST: + ea->miss_nest = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MSG: + ea->str = ynl_attr_get_str(attr); + break; + } + } + + return 2; + } + + return 0; +} + +static const struct { + struct nlmsghdr nlhdr; + struct ndmsg ndm; + struct nlattr ahdr; + __u32 val; +} dump_neigh_bad = { + .nlhdr = { + .nlmsg_len = sizeof(dump_neigh_bad), + .nlmsg_type = RTM_GETNEIGH, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .ndm = { + .ndm_family = 123, + }, + .ahdr = { + .nla_len = 4 + 4, + .nla_type = NDA_FLAGS_EXT, + }, + .val = -1, // should fail MASK validation +}; + +TEST(dump_extack) +{ + int netlink_sock; + char buf[8192]; + int one = 1; + int i, cnt; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_GE(netlink_sock, 0); + + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + + /* Dump so many times we fill up the buffer */ + cnt = 64; + for (i = 0; i < cnt; i++) { + n = send(netlink_sock, &dump_neigh_bad, + sizeof(dump_neigh_bad), 0); + ASSERT_EQ(n, sizeof(dump_neigh_bad)); + } + + /* Read out the ENOBUFS */ + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + EXPECT_EQ(n, -1); + EXPECT_EQ(errno, ENOBUFS); + + for (i = 0; i < cnt; i++) { + struct ext_ack ea = {}; + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + if (n < 0) { + ASSERT_GE(i, 10); + break; + } + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + EXPECT_EQ(ea.attr_offs, + sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); + } +} + +static const struct { + struct nlmsghdr nlhdr; + struct genlmsghdr genlhdr; + struct nlattr ahdr; + __u16 val; + __u16 pad; +} dump_policies = { + .nlhdr = { + .nlmsg_len = sizeof(dump_policies), + .nlmsg_type = GENL_ID_CTRL, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .genlhdr = { + .cmd = CTRL_CMD_GETPOLICY, + .version = 2, + }, + .ahdr = { + .nla_len = 6, + .nla_type = CTRL_ATTR_FAMILY_ID, + }, + .val = GENL_ID_CTRL, + .pad = 0, +}; + +// Sanity check for the test itself, make sure the dump doesn't fit in one msg +TEST(test_sanity) +{ + int netlink_sock; + char buf[8192]; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + close(netlink_sock); +} + +TEST(close_in_progress) +{ + int netlink_sock; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + close(netlink_sock); +} + +TEST(close_with_ref) +{ + char cookie[NOTIFY_COOKIE_LEN] = {}; + int netlink_sock, mq_fd; + struct sigevent sigev; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0); + ASSERT_GE(mq_fd, 0); + + memset(&sigev, 0, sizeof(sigev)); + sigev.sigev_notify = SIGEV_THREAD; + sigev.sigev_value.sival_ptr = cookie; + sigev.sigev_signo = netlink_sock; + + syscall(__NR_mq_notify, mq_fd, &sigev); + + close(netlink_sock); + + // give mqueue time to fire + usleep(100 * 1000); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..960e1ab4dd04 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -171,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 569bce8b6383..66be7699c72c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -197,6 +197,12 @@ # # - pmtu_ipv6_route_change # Same as above but with IPv6 +# +# - pmtu_ipv4_mp_exceptions +# Use the same topology as in pmtu_ipv4, but add routeable addresses +# on host A and B on lo reachable via both routers. Host A and B +# addresses have multipath routes to each other, b_r1 mtu = 1500. +# Check that PMTU exceptions are created for both paths. source lib.sh source net_helper.sh @@ -266,7 +272,8 @@ tests=" list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -343,6 +350,9 @@ tunnel6_a_addr="fd00:2::a" tunnel6_b_addr="fd00:2::b" tunnel6_mask="64" +host4_a_addr="192.168.99.99" +host4_b_addr="192.168.88.88" + dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" @@ -984,6 +994,52 @@ setup_ovs_bridge() { run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 } +setup_multipath_new() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1 + run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2 + run_cmd ${ns_a} ip nexthop add id 403 group 401/402 + run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1 + run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2 + run_cmd ${ns_b} ip nexthop add id 403 group 401/402 + run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403 +} + +setup_multipath_old() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip route add ${host4_b_addr} \ + src ${host4_a_addr} \ + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ + nexthop via ${prefix4}.${a_r2}.2 weight 1 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip route add ${host4_a_addr} \ + src ${host4_b_addr} \ + nexthop via ${prefix4}.${b_r1}.2 weight 1 \ + nexthop via ${prefix4}.${b_r2}.2 weight 1 +} + +setup_multipath() { + if [ "$USE_NH" = "yes" ]; then + setup_multipath_new + else + setup_multipath_old + fi + + # Set up routers with routes to dummies + run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1 + run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -1076,23 +1132,15 @@ link_get_mtu() { } route_get_dst_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - if [ -z "${dsfield}" ]; then - dsfield=0 - fi - - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" + ${ns_cmd} ip route get "$@" } route_get_dst_pmtu_from_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" } check_pmtu_value() { @@ -1235,10 +1283,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1285,9 +1333,9 @@ test_pmtu_ipv4_dscp_udp_exception() { UDP:"${dst2}":50000,tos="${dsfield}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -2056,7 +2104,7 @@ check_running() { pid=${1} cmd=${2} - [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "${cmd}" ] } test_cleanup_vxlanX_exception() { @@ -2329,6 +2377,36 @@ test_pmtu_ipv6_route_change() { test_pmtu_ipvX_route_change 6 } +test_pmtu_ipv4_mp_exceptions() { + setup namespaces routing multipath || return $ksft_skip + + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1500 + mtu "${ns_b}" veth_B-R1 1500 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Ping and expect two nexthop exceptions for two routes + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}" + + # Check that exceptions have been created with the correct PMTU + pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)" + pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)" + + check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1 + check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 4f31e92ebd96..84c524357075 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -48,6 +48,7 @@ #include <string.h> #include <sys/mman.h> #include <sys/socket.h> +#include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> @@ -59,6 +60,33 @@ static uint32_t cfg_max_num_members; +static void loopback_set_up_down(int state_up) +{ + struct ifreq ifreq = {}; + int fd, err; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket loopback"); + exit(1); + } + strcpy(ifreq.ifr_name, "lo"); + err = ioctl(fd, SIOCGIFFLAGS, &ifreq); + if (err) { + perror("SIOCGIFFLAGS"); + exit(1); + } + if (state_up != !!(ifreq.ifr_flags & IFF_UP)) { + ifreq.ifr_flags ^= IFF_UP; + err = ioctl(fd, SIOCSIFFLAGS, &ifreq); + if (err) { + perror("SIOCSIFFLAGS"); + exit(1); + } + } + close(fd); +} + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) @@ -251,6 +279,41 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[]) return 0; } +/* Test that creating/joining a fanout group fails for unbound socket without + * a specified protocol + */ +static void test_unbound_fanout(void) +{ + int val, fd0, fd1, err; + + fprintf(stderr, "test: unbound fanout\n"); + fd0 = socket(PF_PACKET, SOCK_RAW, 0); + if (fd0 < 0) { + perror("socket packet"); + exit(1); + } + /* Try to create a new fanout group. Should fail. */ + val = (PACKET_FANOUT_HASH << 16) | 1; + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout create\n"); + exit(1); + } + fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1); + if (fd1 == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + /* Try to join an existing fanout group. Should fail. */ + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout join\n"); + exit(1); + } + close(fd0); + close(fd1); +} + /* Test illegal mode + flag combination */ static void test_control_single(void) { @@ -264,17 +327,22 @@ static void test_control_single(void) } /* Test illegal group with different modes or flags */ -static void test_control_group(void) +static void test_control_group(int toggle) { int fds[2]; - fprintf(stderr, "test: control multiple sockets\n"); + if (toggle) + fprintf(stderr, "test: control multiple sockets with link down toggle\n"); + else + fprintf(stderr, "test: control multiple sockets\n"); fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } + if (toggle) + loopback_set_up_down(0); if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); @@ -294,6 +362,8 @@ static void test_control_group(void) fprintf(stderr, "ERROR: failed to join group\n"); exit(1); } + if (toggle) + loopback_set_up_down(1); if (close(fds[1]) || close(fds[0])) { fprintf(stderr, "ERROR: closing sockets\n"); exit(1); @@ -488,8 +558,10 @@ int main(int argc, char **argv) const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 20, ret; + test_unbound_fanout(); test_control_single(); - test_control_group(); + test_control_group(0); + test_control_group(1); test_control_group_max_num_members(); test_unique_fanout_group_ids(); diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore new file mode 100644 index 000000000000..1c6f04e2aa11 --- /dev/null +++ b/tools/testing/selftests/net/rds/.gitignore @@ -0,0 +1 @@ +include.sh diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index cf30307a829b..612a7219990e 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -3,11 +3,10 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh -TEST_PROGS := run.sh \ - test.py +TEST_PROGS := run.sh -TEST_FILES := include.sh +TEST_FILES := include.sh test.py -EXTRA_CLEAN := /tmp/rds_logs +EXTRA_CLEAN := /tmp/rds_logs include.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index e6bb109bcead..4a7178d11193 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -14,8 +14,11 @@ import sys import atexit from pwd import getpwuid from os import stat -from lib.py import ip +# Allow utils module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../")) +from lib.py.utils import ip libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdf6f10d0558..2e8243a65b50 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,14 +21,15 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec - kci_test_macsec_offload kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get + kci_test_fdb_del kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr " devdummy="test-dummy0" @@ -44,6 +45,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -559,73 +561,6 @@ kci_test_macsec() end_test "PASS: macsec" } -kci_test_macsec_offload() -{ - sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ - sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ - probed=false - local ret=0 - run_cmd_grep "^Usage: ip macsec" ip macsec help - if [ $? -ne 0 ]; then - end_test "SKIP: macsec: iproute2 too old" - return $ksft_skip - fi - - if ! mount | grep -q debugfs; then - mount -t debugfs none /sys/kernel/debug/ &> /dev/null - fi - - # setup netdevsim since dummydev doesn't have offload support - if [ ! -w /sys/bus/netdevsim/new_device ] ; then - run_cmd modprobe -q netdevsim - - if [ $ret -ne 0 ]; then - end_test "SKIP: macsec_offload can't load netdevsim" - return $ksft_skip - fi - probed=true - fi - - echo "0" > /sys/bus/netdevsim/new_device - while [ ! -d $sysfsnet ] ; do :; done - udevadm settle - dev=`ls $sysfsnet` - - ip link set $dev up - if [ ! -d $sysfsd ] ; then - end_test "FAIL: macsec_offload can't create device $dev" - return 1 - fi - run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev - if [ $? -eq 1 ] ; then - end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" - return 1 - fi - run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac - run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac - run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac - run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac - - msname=kci_macsec1 - run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef - run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" - # clean up any leftovers - for msdev in kci_macsec{1,2,3,4} ; do - ip link del $msdev 2> /dev/null - done - echo 0 > /sys/bus/netdevsim/del_device - $probed && rmmod netdevsim - - if [ $ret -ne 0 ]; then - end_test "FAIL: macsec_offload" - return 1 - fi - end_test "PASS: macsec_offload" -} - #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -809,10 +744,10 @@ kci_test_ipsec_offload() # does driver have correct offload info run_cmd diff $sysfsf - << EOF SA count=2 tx=3 -sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 +sa[0] tx ipaddr=$dstip sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[0] key=0x34333231 38373635 32313039 36353433 -sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0 +sa[1] rx ipaddr=$srcip sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF @@ -1065,6 +1000,45 @@ kci_test_fdb_get() end_test "PASS: bridge fdb get" } +kci_test_fdb_del() +{ + local test_mac=de:ad:be:ef:13:37 + local dummydev="dummy1" + local brdev="test-br0" + local ret=0 + + run_cmd_grep 'bridge fdb get' bridge fdb help + if [ $? -ne 0 ]; then + end_test "SKIP: fdb del tests: iproute2 too old" + return $ksft_skip + fi + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP fdb del tests: cannot add net namespace $testns" + return $ksft_skip + fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" + run_cmd $IP link add $dummydev type dummy + run_cmd $IP link add name $brdev type bridge vlan_filtering 1 + run_cmd $IP link set dev $dummydev master $brdev + run_cmd $BRIDGE fdb add $test_mac dev $dummydev master static vlan 1 + run_cmd $BRIDGE vlan del vid 1 dev $dummydev + run_cmd $BRIDGE fdb get $test_mac br $brdev vlan 1 + run_cmd $BRIDGE fdb del $test_mac dev $dummydev master vlan 1 + run_cmd_fail $BRIDGE fdb get $test_mac br $brdev vlan 1 + + ip netns del $testns &>/dev/null + + if [ $ret -ne 0 ]; then + end_test "FAIL: bridge fdb del" + return 1 + fi + + end_test "PASS: bridge fdb del" +} + kci_test_neigh_get() { dstmac=de:ad:be:ef:13:37 @@ -1267,6 +1241,99 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index db44e77428dd..5db2f65cddc4 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -46,6 +46,7 @@ static inline char *test_snprintf(const char *fmt, va_list vargs) va_copy(tmp, vargs); n = vsnprintf(ret, size, fmt, tmp); + va_end(tmp); if (n < 0) return NULL; diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 084db4ecdff6..0abb9807d742 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -6,6 +6,8 @@ static union tcp_addr tcp_md5_client; +#define FILTER_TEST_NKEYS 16 + static int test_port = 7788; static void make_listen(int sk) { @@ -813,23 +815,197 @@ static void duplicate_tests(void) setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); } +static void fetch_all_keys(int sk, struct tcp_ao_getsockopt *keys) +{ + socklen_t optlen = sizeof(struct tcp_ao_getsockopt); + + memset(keys, 0, sizeof(struct tcp_ao_getsockopt) * FILTER_TEST_NKEYS); + keys[0].get_all = 1; + keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &keys[0], &optlen)) + test_error("getsockopt"); +} + +static int prepare_test_keys(struct tcp_ao_getsockopt *keys) +{ + const char *test_password = "Test password number "; + struct tcp_ao_add test_ao[FILTER_TEST_NKEYS]; + char test_password_scratch[64] = {}; + u8 rcvid = 100, sndid = 100; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + snprintf(test_password_scratch, 64, "%s %d", test_password, i); + test_prepare_key(&test_ao[i], DEFAULT_TEST_ALGO, this_ip_dest, + false, false, DEFAULT_TEST_PREFIX, 0, sndid++, + rcvid++, 0, 0, strlen(test_password_scratch), + test_password_scratch); + } + test_ao[0].set_current = 1; + test_ao[1].set_rnext = 1; + /* One key with a different addr and overlapping sndid, rcvid */ + tcp_addr_to_sockaddr_in(&test_ao[2].addr, &this_ip_addr, 0); + test_ao[2].sndid = 100; + test_ao[2].rcvid = 100; + + /* Add keys in a random order */ + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + int randidx = rand() % (FILTER_TEST_NKEYS - i); + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &test_ao[randidx], sizeof(struct tcp_ao_add))) + test_error("setsockopt()"); + memcpy(&test_ao[randidx], &test_ao[FILTER_TEST_NKEYS - 1 - i], + sizeof(struct tcp_ao_add)); + } + + fetch_all_keys(sk, keys); + + return sk; +} + +/* Assumes passwords are unique */ +static int compare_mkts(struct tcp_ao_getsockopt *expected, int nexpected, + struct tcp_ao_getsockopt *actual, int nactual) +{ + int matches = 0; + + for (int i = 0; i < nexpected; i++) { + for (int j = 0; j < nactual; j++) { + if (memcmp(expected[i].key, actual[j].key, + TCP_AO_MAXKEYLEN) == 0) + matches++; + } + } + return nexpected - matches; +} + +static void filter_keys_checked(int sk, struct tcp_ao_getsockopt *filter, + struct tcp_ao_getsockopt *expected, + unsigned int nexpected, const char *tst) +{ + struct tcp_ao_getsockopt filtered_keys[FILTER_TEST_NKEYS] = {}; + struct tcp_ao_getsockopt all_keys[FILTER_TEST_NKEYS] = {}; + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + fetch_all_keys(sk, all_keys); + memcpy(&filtered_keys[0], filter, sizeof(struct tcp_ao_getsockopt)); + filtered_keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, filtered_keys, &len)) + test_error("getsockopt"); + if (filtered_keys[0].nkeys != nexpected) { + test_fail("wrong nr of keys, expected %u got %u", nexpected, + filtered_keys[0].nkeys); + goto out_close; + } + if (compare_mkts(expected, nexpected, filtered_keys, + filtered_keys[0].nkeys)) { + test_fail("got wrong keys back"); + goto out_close; + } + test_ok("filter keys: %s", tst); + +out_close: + close(sk); + memset(filter, 0, sizeof(struct tcp_ao_getsockopt)); +} + +static void filter_tests(void) +{ + struct tcp_ao_getsockopt original_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt expected_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt filter = {}; + int sk, f, nmatches; + socklen_t len; + + f = 2; + sk = prepare_test_keys(original_keys); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + memcpy(&filter.addr, &original_keys[f].addr, + sizeof(original_keys[f].addr)); + filter.prefix = original_keys[f].prefix; + filter_keys_checked(sk, &filter, &original_keys[f], 1, + "by sndid, rcvid, address"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_current) { + f = i; + break; + } + } + if (f < 0) + test_error("No current key after adding one"); + filter.is_current = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_current"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_rnext) { + f = i; + break; + } + } + if (f < 0) + test_error("No rnext key after adding one"); + filter.is_rnext = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_rnext"); + + f = -1; + nmatches = 0; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].sndid == 100) { + f = i; + memcpy(&expected_keys[nmatches], &original_keys[i], + sizeof(struct tcp_ao_getsockopt)); + nmatches++; + } + } + if (f < 0) + test_error("No key for sndid 100"); + if (nmatches != 2) + test_error("Should have 2 keys with sndid 100"); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + filter.addr.ss_family = test_family; + filter_keys_checked(sk, &filter, expected_keys, nmatches, + "by sndid, rcvid"); + + sk = prepare_test_keys(original_keys); + filter.get_all = 1; + filter.nkeys = FILTER_TEST_NKEYS / 2; + len = sizeof(struct tcp_ao_getsockopt); + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &filter, &len)) + test_error("getsockopt"); + if (filter.nkeys == FILTER_TEST_NKEYS) + test_ok("filter keys: correct nkeys when in.nkeys < matches"); + else + test_fail("filter keys: wrong nkeys, expected %u got %u", + FILTER_TEST_NKEYS, filter.nkeys); +} + static void *client_fn(void *arg) { if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) test_error("Can't convert ip address"); extend_tests(); einval_tests(); + filter_tests(); duplicate_tests(); - /* - * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters - * returning proper nr & keys; - */ return NULL; } int main(int argc, char *argv[]) { - test_init(121, client_fn, NULL); + test_init(126, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index f27a12d2a2c9..1a706d03bb6b 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -266,6 +266,25 @@ TEST_F(tls_basic, bad_cipher) EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); } +TEST_F(tls_basic, recseq_wrap) +{ + struct tls_crypto_info_keys tls12; + char const *test_str = "test_read"; + int send_len = 10; + + if (self->notls) + SKIP(return, "no TLS support"); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); + + ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + ASSERT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + FIXTURE(tls) { int fd, cfd; diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index d626f22f9550..dae91eb97d69 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -77,6 +77,8 @@ static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; static bool cfg_print_nsec; +static uint32_t ts_opt_id; +static bool cfg_use_cmsg_opt_id; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -136,12 +138,13 @@ static void validate_key(int tskey, int tstype) /* compare key for each subsequent request * must only test for one type, the first one requested */ - if (saved_tskey == -1) + if (saved_tskey == -1 || cfg_use_cmsg_opt_id) saved_tskey_type = tstype; else if (saved_tskey_type != tstype) return; stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + stepsize = cfg_use_cmsg_opt_id ? 0 : stepsize; if (tskey != saved_tskey + stepsize) { fprintf(stderr, "ERROR: key %d, expected %d\n", tskey, saved_tskey + stepsize); @@ -484,7 +487,7 @@ static void fill_header_udp(void *p, bool is_ipv4) static void do_test(int family, unsigned int report_opt) { - char control[CMSG_SPACE(sizeof(uint32_t))]; + char control[2 * CMSG_SPACE(sizeof(uint32_t))]; struct sockaddr_ll laddr; unsigned int sock_opt; struct cmsghdr *cmsg; @@ -624,18 +627,32 @@ static void do_test(int family, unsigned int report_opt) msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (cfg_use_cmsg) { + if (cfg_use_cmsg || cfg_use_cmsg_opt_id) { memset(control, 0, sizeof(control)); msg.msg_control = control; - msg.msg_controllen = sizeof(control); + msg.msg_controllen = cfg_use_cmsg * CMSG_SPACE(sizeof(uint32_t)); + msg.msg_controllen += cfg_use_cmsg_opt_id * CMSG_SPACE(sizeof(uint32_t)); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SO_TIMESTAMPING; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + cmsg = NULL; + if (cfg_use_cmsg) { + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = report_opt; + } + if (cfg_use_cmsg_opt_id) { + cmsg = cmsg ? CMSG_NXTHDR(&msg, cmsg) : CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = ts_opt_id; + saved_tskey = ts_opt_id; + } - *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } val = sendmsg(fd, &msg, 0); @@ -685,6 +702,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -L listen on hostname and port\n" " -n: set no-payload option\n" " -N: print timestamps and durations in nsec (instead of usec)\n" + " -o N: use SCM_TS_OPT_ID control message to provide N as tskey\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" @@ -705,7 +723,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -746,6 +764,10 @@ static void parse_opt(int argc, char **argv) case 'N': cfg_print_nsec = true; break; + case 'o': + ts_opt_id = strtoul(optarg, NULL, 10); + cfg_use_cmsg_opt_id = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -803,6 +825,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) + error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 25baca4b148e..fe4649bb8786 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -37,11 +37,13 @@ run_test_v4v6() { run_test_tcpudpraw() { local -r args=$@ - run_test_v4v6 ${args} # tcp - run_test_v4v6 ${args} -u # udp - run_test_v4v6 ${args} -r # raw - run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) - run_test_v4v6 ${args} -P # pf_packet + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -u -o 42 # udp with fixed tskey + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -r -o 42 # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet } run_test_all() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 4f1edbafb946..6bb7dfaa30b6 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -46,8 +46,6 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - echo "#kernel" > $BASE - chmod go-rw $BASE } __chk_flag() { diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 59cb26cf3f73..d43afe243779 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -9,6 +9,8 @@ # YNL_GEN_FILES: TEST_GEN_FILES which need YNL YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_SPECS := \ + $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) $(YNL_OUTPUTS): $(OUTPUT)/libynl.a $(YNL_OUTPUTS): CFLAGS += \ @@ -16,6 +18,20 @@ $(YNL_OUTPUTS): CFLAGS += \ -I$(top_srcdir)/tools/net/ynl/lib/ \ -I$(top_srcdir)/tools/net/ynl/generated/ -$(OUTPUT)/libynl.a: +# Make sure we rebuild libynl if user added a new family. We can't easily +# depend on the contents of a variable so create a fake file with a hash. +YNL_GENS_HASH := $(shell echo $(YNL_GENS) | sha1sum | cut -c1-8) +$(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: + $(Q)rm -f $(OUTPUT)/.libynl-*.sig + $(Q)touch $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + +$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a + +EXTRA_CLEAN += \ + $(top_srcdir)/tools/net/ynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ + $(OUTPUT)/.libynl-*.sig \ + $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 8de98ea7af80..e92e0b885861 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -130,9 +130,9 @@ QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIB QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) +QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory. diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile new file mode 100644 index 000000000000..3e84e26341d1 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -0,0 +1,2 @@ +TEST_PROGS = set_pcie_cooling_state.sh +include ../lib.mk diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh new file mode 100755 index 000000000000..9df606552af3 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 +skipmsg="skip all tests:" + +PCIEPORTTYPE="PCIe_Port_Link_Speed" + +prerequisite() +{ + local ports + + if [ $UID != 0 ]; then + echo $skipmsg must be run as root >&2 + exit $ksft_skip + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $skipmsg sysfs is not mounted >&2 + exit $ksft_skip + fi + + if ! ls $SYSFS/class/thermal/cooling_device* > /dev/null 2>&1; then + echo $skipmsg thermal cooling devices missing >&2 + exit $ksft_skip + fi + + ports=`grep -e "^$PCIEPORTTYPE" $SYSFS/class/thermal/cooling_device*/type | wc -l` + if [ $ports -eq 0 ]; then + echo $skipmsg pcie cooling devices missing >&2 + exit $ksft_skip + fi +} + +testport= +find_pcie_port() +{ + local patt="$1" + local pcieports + local max + local cur + local delta + local bestdelta=-1 + + pcieports=`grep -l -F -e "$patt" /sys/class/thermal/cooling_device*/type` + if [ -z "$pcieports" ]; then + return + fi + pcieports=${pcieports//\/type/} + # Find the port with the highest PCIe Link Speed + for port in $pcieports; do + max=`cat $port/max_state` + cur=`cat $port/cur_state` + delta=$((max-cur)) + if [ $delta -gt $bestdelta ]; then + testport="$port" + bestdelta=$delta + fi + done +} + +sysfspcidev= +find_sysfs_pci_dev() +{ + local typefile="$1/type" + local pcidir + + pcidir="$SYSFS/bus/pci/devices/`sed -e "s|^${PCIEPORTTYPE}_||g" $typefile`" + + if [ -r "$pcidir/current_link_speed" ]; then + sysfspcidev="$pcidir/current_link_speed" + fi +} + +usage() +{ + echo "Usage $0 [ -d dev ]" + echo -e "\t-d: PCIe port BDF string (e.g., 0000:00:04.0)" +} + +pattern="$PCIEPORTTYPE" +parse_arguments() +{ + while getopts d:h opt; do + case $opt in + h) + usage "$0" + exit 0 + ;; + d) + pattern="$PCIEPORTTYPE_$OPTARG" + ;; + *) + usage "$0" + exit 0 + ;; + esac + done +} + +parse_arguments "$@" +prerequisite +find_pcie_port "$pattern" +if [ -z "$testport" ]; then + echo $skipmsg "pcie cooling device not found from sysfs" >&2 + exit $ksft_skip +fi +find_sysfs_pci_dev "$testport" +if [ -z "$sysfspcidev" ]; then + echo $skipmsg "PCIe port device not found from sysfs" >&2 + exit $ksft_skip +fi + +./set_pcie_speed.sh "$testport" "$sysfspcidev" +retval=$? + +exit $retval diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh new file mode 100755 index 000000000000..584596949312 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +set -e + +TESTNAME=set_pcie_speed + +declare -a PCIELINKSPEED=( + "2.5 GT/s PCIe" + "5.0 GT/s PCIe" + "8.0 GT/s PCIe" + "16.0 GT/s PCIe" + "32.0 GT/s PCIe" + "64.0 GT/s PCIe" +) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 + +coolingdev="$1" +statefile="$coolingdev/cur_state" +maxfile="$coolingdev/max_state" +linkspeedfile="$2" + +oldstate=`cat $statefile` +maxstate=`cat $maxfile` + +set_state() +{ + local state=$1 + local linkspeed + local expected_linkspeed + + echo $state > $statefile + + sleep 1 + + linkspeed="`cat $linkspeedfile`" + expected_linkspeed=$((maxstate-state)) + expected_str="${PCIELINKSPEED[$expected_linkspeed]}" + if [ ! "${expected_str}" = "${linkspeed}" ]; then + echo "$TESTNAME failed: expected: ${expected_str}; got ${linkspeed}" + retval=1 + fi +} + +cleanup_skip () +{ + set_state $oldstate + exit $ksft_skip +} + +trap cleanup_skip EXIT + +echo "$TESTNAME: testing states $maxstate .. $oldstate with $coolingdev" +for i in $(seq $maxstate -1 $oldstate); do + set_state "$i" +done + +trap EXIT +if [ $retval -eq 0 ]; then + echo "$TESTNAME [PASS]" +else + echo "$TESTNAME [FAIL]" +fi +exit $retval diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index c62564c264b1..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/ioctl.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/wait.h> @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,6 +183,56 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: diff --git a/tools/testing/selftests/powerpc/alignment/settings b/tools/testing/selftests/powerpc/alignment/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/cache_shape/settings b/tools/testing/selftests/powerpc/cache_shape/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/copyloops/settings b/tools/testing/selftests/powerpc/copyloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dexcr/settings b/tools/testing/selftests/powerpc/dexcr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/lib/settings b/tools/testing/selftests/powerpc/lib/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/lib/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/math/settings b/tools/testing/selftests/powerpc/math/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mce/settings b/tools/testing/selftests/powerpc/mce/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/settings b/tools/testing/selftests/powerpc/mm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c index ed9143990888..9c0d343d7137 100644 --- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -175,7 +175,7 @@ static int test(void) page_size = getpagesize(); getrlimit(RLIMIT_STACK, &rlimit); - printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + printf("Stack rlimit is 0x%llx\n", (unsigned long long)rlimit.rlim_cur); printf("Testing loads ...\n"); test_one_type(LOAD, page_size, rlimit.rlim_cur); diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 3ae77ba93208..8cf9fd5fed1c 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -211,8 +211,8 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at %p\n", - file_name, filesize, fileblock); + printf("allocated %s for 0x%llx bytes at %p\n", + file_name, (long long)filesize, fileblock); printf("testing file map...\n"); diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 48344a74b212..35f0098399cc 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -313,16 +313,16 @@ static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) fclose(f); - if (nr_anamolies == 0) { - remove(path); - return; - } - sprintf(logfile, logfilename, tid); strcpy(path, logdir); strcat(path, separator); strcat(path, logfile); + if (nr_anamolies == 0) { + remove(path); + return; + } + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", tid, nr_anamolies, path); } diff --git a/tools/testing/selftests/powerpc/nx-gzip/settings b/tools/testing/selftests/powerpc/nx-gzip/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_attributes/settings b/tools/testing/selftests/powerpc/papr_attributes/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_sysparm/settings b/tools/testing/selftests/powerpc/papr_sysparm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_vpd/settings b/tools/testing/selftests/powerpc/papr_vpd/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2070a1e2b3a5..d8dd9a9c6c1b 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -144,9 +144,6 @@ static int test_body(void) /* Run for 16Bi instructions */ FAIL_IF(do_count_loop(events, 16000000000, overhead, true)); - /* Run for 64Bi instructions */ - FAIL_IF(do_count_loop(events, 64000000000, overhead, true)); - event_close(&events[0]); event_close(&events[1]); diff --git a/tools/testing/selftests/powerpc/pmu/settings b/tools/testing/selftests/powerpc/pmu/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/primitives/settings b/tools/testing/selftests/powerpc/primitives/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/ptrace/settings b/tools/testing/selftests/powerpc/ptrace/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/scripts/settings b/tools/testing/selftests/powerpc/scripts/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index f43aa4b77fba..9a4612e2e953 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -36,8 +36,7 @@ fi tainted=$(cat /proc/sys/kernel/tainted) if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel already tainted!" >&2 - exit 1 + echo "Warning: kernel already tainted! ($tainted)" >&2 fi mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" @@ -68,9 +67,10 @@ fi echo "Waiting for timeout ..." wait +orig_tainted=$tainted tainted=$(cat /proc/sys/kernel/tainted) -if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel became tainted!" >&2 +if [[ "$tainted" != "$orig_tainted" ]]; then + echo "Error: kernel newly tainted, before ($orig_tainted) after ($tainted)" >&2 exit 1 fi diff --git a/tools/testing/selftests/powerpc/security/settings b/tools/testing/selftests/powerpc/security/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index 08f9afe3b95c..c101b1391696 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -321,5 +321,5 @@ int main(int argc, char **argv) if (!args) args = ARG_COMPLETE; - test_harness(signal_fuzzer, "signal_fuzzer"); + return test_harness(signal_fuzzer, "signal_fuzzer"); } diff --git a/tools/testing/selftests/powerpc/stringloops/settings b/tools/testing/selftests/powerpc/stringloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/switch_endian/settings b/tools/testing/selftests/powerpc/switch_endian/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/syscalls/settings b/tools/testing/selftests/powerpc/syscalls/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 421cb082f6be..0a4bc479ae39 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -176,5 +176,5 @@ int tm_signal_context_force_tm(void) int main(int argc, char **argv) { - test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); + return test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 06b801906f27..968864b052ec 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -46,6 +46,5 @@ int tm_signal_sigreturn_nt(void) int main(int argc, char **argv) { - test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); + return test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); } - diff --git a/tools/testing/selftests/powerpc/vphn/settings b/tools/testing/selftests/powerpc/vphn/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 011252fe238c..58064151f2c8 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -146,6 +146,7 @@ static void usage(char *progname) " -T val set the ptp clock time to 'val' seconds\n" " -x val get an extended ptp clock time with the desired number of samples (up to %d)\n" " -X get a ptp clock cross timestamp\n" + " -y val pre/post tstamp timebase to use {realtime|monotonic|monotonic-raw}\n" " -z test combinations of rising/falling external time stamp flags\n", progname, PTP_MAX_SAMPLES); } @@ -189,6 +190,7 @@ int main(int argc, char *argv[]) int seconds = 0; int settime = 0; int channel = -1; + clockid_t ext_clockid = CLOCK_REALTIME; int64_t t1, t2, tp; int64_t interval, offset; @@ -198,7 +200,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -278,6 +280,21 @@ int main(int argc, char *argv[]) case 'X': getcross = 1; break; + case 'y': + if (!strcasecmp(optarg, "realtime")) + ext_clockid = CLOCK_REALTIME; + else if (!strcasecmp(optarg, "monotonic")) + ext_clockid = CLOCK_MONOTONIC; + else if (!strcasecmp(optarg, "monotonic-raw")) + ext_clockid = CLOCK_MONOTONIC_RAW; + else { + fprintf(stderr, + "type needs to be realtime, monotonic or monotonic-raw; was given %s\n", + optarg); + return -1; + } + break; + case 'z': flagtest = 1; break; @@ -566,6 +583,7 @@ int main(int argc, char *argv[]) } soe->n_samples = getextended; + soe->clockid = ext_clockid; if (ioctl(fd, PTP_SYS_OFFSET_EXTENDED, soe)) { perror("PTP_SYS_OFFSET_EXTENDED"); @@ -574,12 +592,46 @@ int main(int argc, char *argv[]) getextended); for (i = 0; i < getextended; i++) { - printf("sample #%2d: system time before: %lld.%09u\n", - i, soe->ts[i][0].sec, soe->ts[i][0].nsec); + switch (ext_clockid) { + case CLOCK_REALTIME: + printf("sample #%2d: real time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + case CLOCK_MONOTONIC: + printf("sample #%2d: monotonic time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + case CLOCK_MONOTONIC_RAW: + printf("sample #%2d: monotonic-raw time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + default: + break; + } printf(" phc time: %lld.%09u\n", soe->ts[i][1].sec, soe->ts[i][1].nsec); - printf(" system time after: %lld.%09u\n", - soe->ts[i][2].sec, soe->ts[i][2].nsec); + switch (ext_clockid) { + case CLOCK_REALTIME: + printf(" real time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + case CLOCK_MONOTONIC: + printf(" monotonic time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + case CLOCK_MONOTONIC_RAW: + printf(" monotonic-raw time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + default: + break; + } } } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index c3808c490d92..f87046b702d8 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -56,27 +56,30 @@ do echo > $i/kvm-test-1-run-qemu.sh.out export TORTURE_AFFINITY= kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate - cat << ' ___EOF___' >> $T/cpubatches.awk - END { - affinitylist = ""; - if (!gotcpus()) { - print "echo No CPU-affinity information, so no taskset command."; - } else if (cpu_count !~ /^[0-9][0-9]*$/) { - print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; - } else { - affinitylist = nextcpus(cpu_count); - if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) - print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; - else if (!dumpcpustate()) - print "echo " scenario ": Could not dump state, so no taskset command."; - else - print "export TORTURE_AFFINITY=" affinitylist; + if test -z "${TORTURE_NO_AFFINITY}" + then + cat << ' ___EOF___' >> $T/cpubatches.awk + END { + affinitylist = ""; + if (!gotcpus()) { + print "echo No CPU-affinity information, so no taskset command."; + } else if (cpu_count !~ /^[0-9][0-9]*$/) { + print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; + } else { + affinitylist = nextcpus(cpu_count); + if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) + print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; + else if (!dumpcpustate()) + print "echo " scenario ": Could not dump state, so no taskset command."; + else + print "export TORTURE_AFFINITY=" affinitylist; + } } - } - ___EOF___ - cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" - affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" - $affinity_export + ___EOF___ + cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" + affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" + $affinity_export + fi kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & done for i in $runfiles diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 7af73ddc148d..42e5e8597a1a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -42,6 +42,7 @@ TORTURE_JITTER_STOP="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" +TORTURE_NO_AFFINITY="" TORTURE_QEMU_MEM=512 torture_qemu_mem_default=1 TORTURE_REMOTE= @@ -82,6 +83,7 @@ usage () { echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" + echo " --no-affinity" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." @@ -220,6 +222,9 @@ do torture_qemu_mem_default= shift ;; + --no-affinity) + TORTURE_NO_AFFINITY="no-affinity" + ;; --no-initrd) TORTURE_INITRD=""; export TORTURE_INITRD ;; @@ -417,6 +422,7 @@ TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_K TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD +TORTURE_NO_AFFINITY="$TORTURE_NO_AFFINITY"; export TORTURE_NO_AFFINITY TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 98b6175e5aa0..45f572570a8c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -5,6 +5,7 @@ TREE04 TREE05 TREE07 TREE09 +SRCU-L SRCU-N SRCU-P SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L new file mode 100644 index 000000000000..3b4fa8dbef8a --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L @@ -0,0 +1,10 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=6 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_RCU_EXPERT=n +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot new file mode 100644 index 000000000000..0207b3138c5b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot @@ -0,0 +1,3 @@ +rcutorture.torture_type=srcu +rcutorture.reader_flavor=0x4 +rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot index ce0694fd9b92..b54cf87dc110 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -1,2 +1,3 @@ rcutorture.torture_type=srcu +rcutorture.reader_flavor=0x2 rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index a323d8948b7c..759ee51d3ddc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=56 +CONFIG_NR_CPUS=74 CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 0c045080d808..3bbf3042fb06 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -99,14 +99,13 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of } /* Field 3 is llc occ resc value */ - if (runs > 0) - sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); + sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); runs++; } fclose(fp); return show_results_info(sum_llc_occu_resc, no_of_bits, span, - MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true); + MAX_DIFF, MAX_DIFF_PERCENT, runs, true); } static void cmt_test_cleanup(void) @@ -116,15 +115,13 @@ static void cmt_test_cleanup(void) static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams) { - const char * const *cmd = uparams->benchmark_cmd; - const char *new_cmd[BENCHMARK_ARGS]; + struct fill_buf_param fill_buf = {}; unsigned long cache_total_size = 0; int n = uparams->bits ? : 5; unsigned long long_mask; - char *span_str = NULL; int count_of_bits; size_t span; - int ret, i; + int ret; ret = get_full_cbm("L3", &long_mask); if (ret) @@ -155,32 +152,26 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param span = cache_portion_size(cache_total_size, param.mask, long_mask); - if (strcmp(cmd[0], "fill_buf") == 0) { - /* Duplicate the command to be able to replace span in it */ - for (i = 0; uparams->benchmark_cmd[i]; i++) - new_cmd[i] = uparams->benchmark_cmd[i]; - new_cmd[i] = NULL; - - ret = asprintf(&span_str, "%zu", span); - if (ret < 0) - return -1; - new_cmd[1] = span_str; - cmd = new_cmd; + if (uparams->fill_buf) { + fill_buf.buf_size = span; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + fill_buf.buf_size = span; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; } remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, cmd, ¶m); + ret = resctrl_val(test, uparams, ¶m); if (ret) - goto out; + return ret; ret = check_results(¶m, span, n); if (ret && (get_vendor() == ARCH_INTEL)) ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -out: - free(span_str); - return ret; } diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index ae120f1735c0..19a01a52dc1a 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -88,18 +88,6 @@ static int fill_one_span_read(unsigned char *buf, size_t buf_size) return sum; } -static void fill_one_span_write(unsigned char *buf, size_t buf_size) -{ - unsigned char *end_ptr = buf + buf_size; - unsigned char *p; - - p = buf; - while (p < end_ptr) { - *p = '1'; - p += (CL_SIZE / 2); - } -} - void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) { int ret = 0; @@ -114,20 +102,11 @@ void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) *value_sink = ret; } -static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) -{ - while (1) { - fill_one_span_write(buf, buf_size); - if (once) - break; - } -} - -unsigned char *alloc_buffer(size_t buf_size, int memflush) +unsigned char *alloc_buffer(size_t buf_size, bool memflush) { void *buf = NULL; uint64_t *p64; - size_t s64; + ssize_t s64; int ret; ret = posix_memalign(&buf, PAGE_SIZE, buf_size); @@ -151,19 +130,15 @@ unsigned char *alloc_buffer(size_t buf_size, int memflush) return buf; } -int run_fill_buf(size_t buf_size, int memflush, int op, bool once) +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type) { - unsigned char *buf; - - buf = alloc_buffer(buf_size, memflush); - if (!buf) - return -1; + unsigned long cache_total_size = 0; + int ret; - if (op == 0) - fill_cache_read(buf, buf_size, once); - else - fill_cache_write(buf, buf_size, once); - free(buf); + ret = get_cache_size(cpu_no, cache_type, &cache_total_size); + if (ret) + return ret; - return 0; + return cache_total_size * 2 > MINIMUM_SPAN ? + cache_total_size * 2 : MINIMUM_SPAN; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index ab8496a4925b..536d9089d2f6 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -21,7 +21,7 @@ static int mba_init(const struct resctrl_val_param *param, int domain_id) { int ret; - ret = initialize_mem_bw_imc(); + ret = initialize_read_mem_bw_imc(); if (ret) return ret; @@ -39,7 +39,8 @@ static int mba_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) { - static int runs_per_allocation, allocation = 100; + static unsigned int allocation = ALLOCATION_MIN; + static int runs_per_allocation; char allocation_str[64]; int ret; @@ -50,7 +51,7 @@ static int mba_setup(const struct resctrl_test *test, if (runs_per_allocation++ != 0) return 0; - if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX) + if (allocation > ALLOCATION_MAX) return END_OF_TESTS; sprintf(allocation_str, "%d", allocation); @@ -59,7 +60,7 @@ static int mba_setup(const struct resctrl_test *test, if (ret < 0) return ret; - allocation -= ALLOCATION_STEP; + allocation += ALLOCATION_STEP; return 0; } @@ -67,13 +68,14 @@ static int mba_setup(const struct resctrl_test *test, static int mba_measure(const struct user_params *uparams, struct resctrl_val_param *param, pid_t bm_pid) { - return measure_mem_bw(uparams, param, bm_pid, "reads"); + return measure_read_mem_bw(uparams, param, bm_pid); } static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) { - int allocation, runs; + unsigned int allocation; bool ret = false; + int runs; ksft_print_msg("Results are displayed in (MB)\n"); /* Memory bandwidth from 100% down to 10% */ @@ -84,18 +86,21 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) int avg_diff_per; float avg_diff; - /* - * The first run is discarded due to inaccurate value from - * phase transition. - */ - for (runs = NUM_OF_RUNS * allocation + 1; + for (runs = NUM_OF_RUNS * allocation; runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); - avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; + if (avg_bw_imc < THROTTLE_THRESHOLD || avg_bw_resc < THROTTLE_THRESHOLD) { + ksft_print_msg("Bandwidth below threshold (%d MiB). Dropping results from MBA schemata %u.\n", + THROTTLE_THRESHOLD, + ALLOCATION_MIN + ALLOCATION_STEP * allocation); + continue; + } + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -103,7 +108,7 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) avg_diff_per > MAX_DIFF_PERCENT ? "Fail:" : "Pass:", MAX_DIFF_PERCENT, - ALLOCATION_MAX - ALLOCATION_STEP * allocation); + ALLOCATION_MIN + ALLOCATION_STEP * allocation); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); @@ -122,8 +127,9 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) static int check_results(void) { + unsigned long bw_resc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; + unsigned long bw_imc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; char *token_array[8], output[] = RESULT_FILE_NAME, temp[512]; - unsigned long bw_imc[1024], bw_resc[1024]; int runs; FILE *fp; @@ -170,11 +176,27 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param .setup = mba_setup, .measure = mba_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 6b5a3b52d861..315b2ef3b3bc 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -22,17 +22,13 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) int runs, ret, avg_diff_per; float avg_diff = 0; - /* - * Discard the first value which is inaccurate due to monitoring setup - * transition phase. - */ - for (runs = 1; runs < NUM_OF_RUNS ; runs++) { + for (runs = 0; runs < NUM_OF_RUNS; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / 4; - avg_bw_resc = sum_bw_resc / 4; + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -40,7 +36,8 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) ksft_print_msg("%s Check MBM diff within %d%%\n", ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); - ksft_print_msg("Span (MB): %zu\n", span / MB); + if (span) + ksft_print_msg("Span (MB): %zu\n", span / MB); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); @@ -90,7 +87,7 @@ static int mbm_init(const struct resctrl_val_param *param, int domain_id) { int ret; - ret = initialize_mem_bw_imc(); + ret = initialize_read_mem_bw_imc(); if (ret) return ret; @@ -121,7 +118,7 @@ static int mbm_setup(const struct resctrl_test *test, static int mbm_measure(const struct user_params *uparams, struct resctrl_val_param *param, pid_t bm_pid) { - return measure_mem_bw(uparams, param, bm_pid, "reads"); + return measure_read_mem_bw(uparams, param, bm_pid); } static void mbm_test_cleanup(void) @@ -138,15 +135,31 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param .setup = mbm_setup, .measure = mbm_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); if (ret) return ret; - ret = check_results(DEFAULT_SPAN); + ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0); if (ret && (get_vendor() == ARCH_INTEL)) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 2dda56084588..dab1953fc7a0 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -41,18 +41,48 @@ #define BENCHMARK_ARGS 64 -#define DEFAULT_SPAN (250 * MB) +#define MINIMUM_SPAN (250 * MB) + +/* + * Memory bandwidth (in MiB) below which the bandwidth comparisons + * between iMC and resctrl are considered unreliable. For example RAS + * features or memory performance features that generate memory traffic + * may drive accesses that are counted differently by performance counters + * and MBM respectively, for instance generating "overhead" traffic which + * is not counted against any specific RMID. + */ +#define THROTTLE_THRESHOLD 750 + +/* + * fill_buf_param: "fill_buf" benchmark parameters + * @buf_size: Size (in bytes) of buffer used in benchmark. + * "fill_buf" allocates and initializes buffer of + * @buf_size. User can change value via command line. + * @memflush: If false the buffer will not be flushed after + * allocation and initialization, otherwise the + * buffer will be flushed. User can change value via + * command line (via integers with 0 interpreted as + * false and anything else as true). + */ +struct fill_buf_param { + size_t buf_size; + bool memflush; +}; /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to * @bits: Number of bits used for cache allocation size * @benchmark_cmd: Benchmark command to run during (some of the) tests + * @fill_buf: Pointer to user provided parameters for "fill_buf", + * NULL if user did not provide parameters and test + * specific defaults should be used. */ struct user_params { int cpu; int bits; const char *benchmark_cmd[BENCHMARK_ARGS]; + const struct fill_buf_param *fill_buf; }; /* @@ -87,21 +117,29 @@ struct resctrl_test { * @init: Callback function to initialize test environment * @setup: Callback function to setup per test run environment * @measure: Callback that performs the measurement (a single test) + * @fill_buf: Parameters for default "fill_buf" benchmark. + * Initialized with user provided parameters, possibly + * adapted to be relevant to the test. If user does + * not provide parameters for "fill_buf" nor a + * replacement benchmark then initialized with defaults + * appropriate for test. NULL if user provided + * benchmark. */ struct resctrl_val_param { - const char *ctrlgrp; - const char *mongrp; - char filename[64]; - unsigned long mask; - int num_of_runs; - int (*init)(const struct resctrl_val_param *param, - int domain_id); - int (*setup)(const struct resctrl_test *test, - const struct user_params *uparams, - struct resctrl_val_param *param); - int (*measure)(const struct user_params *uparams, - struct resctrl_val_param *param, - pid_t bm_pid); + const char *ctrlgrp; + const char *mongrp; + char filename[64]; + unsigned long mask; + int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); + int (*setup)(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); + struct fill_buf_param *fill_buf; }; struct perf_event_read { @@ -126,7 +164,6 @@ int filter_dmesg(void); int get_domain_id(const char *resource, int cpu_no, int *domain_id); int mount_resctrlfs(void); int umount_resctrlfs(void); -const char *get_bw_report_type(const char *bw_report); bool resctrl_resource_exists(const char *resource); bool resctrl_mon_feature_exists(const char *resource, const char *feature); bool resource_info_file_exists(const char *resource, const char *file); @@ -139,19 +176,17 @@ int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); -unsigned char *alloc_buffer(size_t buf_size, int memflush); +unsigned char *alloc_buffer(size_t buf_size, bool memflush); void mem_flush(unsigned char *buf, size_t buf_size); void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); -int run_fill_buf(size_t buf_size, int memflush, int op, bool once); -int initialize_mem_bw_imc(void); -int measure_mem_bw(const struct user_params *uparams, - struct resctrl_val_param *param, pid_t bm_pid, - const char *bw_report); +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type); +int initialize_read_mem_bw_imc(void); +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid); void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, int domain_id); int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param); unsigned long create_bit_mask(unsigned int start, unsigned int len); unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index ecbb7605a981..3335af815b21 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -148,6 +148,78 @@ cleanup: test_cleanup(test); } +/* + * Allocate and initialize a struct fill_buf_param with user provided + * (via "-b fill_buf <fill_buf parameters>") parameters. + * + * Use defaults (that may not be appropriate for all tests) for any + * fill_buf parameters omitted by the user. + * + * Historically it may have been possible for user space to provide + * additional parameters, "operation" ("read" vs "write") in + * benchmark_cmd[3] and "once" (run "once" or until terminated) in + * benchmark_cmd[4]. Changing these parameters have never been + * supported with the default of "read" operation and running until + * terminated built into the tests. Any unsupported values for + * (original) "fill_buf" parameters are treated as failure. + * + * Return: On failure, forcibly exits the test on any parsing failure, + * returns NULL if no parsing needed (user did not actually provide + * "-b fill_buf"). + * On success, returns pointer to newly allocated and fully + * initialized struct fill_buf_param that caller must free. + */ +static struct fill_buf_param *alloc_fill_buf_param(struct user_params *uparams) +{ + struct fill_buf_param *fill_param = NULL; + char *endptr = NULL; + + if (!uparams->benchmark_cmd[0] || strcmp(uparams->benchmark_cmd[0], "fill_buf")) + return NULL; + + fill_param = malloc(sizeof(*fill_param)); + if (!fill_param) + ksft_exit_skip("Unable to allocate memory for fill_buf parameters.\n"); + + if (uparams->benchmark_cmd[1] && *uparams->benchmark_cmd[1] != '\0') { + errno = 0; + fill_param->buf_size = strtoul(uparams->benchmark_cmd[1], &endptr, 10); + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark buffer size.\n"); + } + } else { + fill_param->buf_size = MINIMUM_SPAN; + } + + if (uparams->benchmark_cmd[2] && *uparams->benchmark_cmd[2] != '\0') { + errno = 0; + fill_param->memflush = strtol(uparams->benchmark_cmd[2], &endptr, 10) != 0; + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark memflush parameter.\n"); + } + } else { + fill_param->memflush = true; + } + + if (uparams->benchmark_cmd[3] && *uparams->benchmark_cmd[3] != '\0') { + if (strcmp(uparams->benchmark_cmd[3], "0")) { + free(fill_param); + ksft_exit_skip("Only read operations supported.\n"); + } + } + + if (uparams->benchmark_cmd[4] && *uparams->benchmark_cmd[4] != '\0') { + if (strcmp(uparams->benchmark_cmd[4], "false")) { + free(fill_param); + ksft_exit_skip("fill_buf is required to run until termination.\n"); + } + } + + return fill_param; +} + static void init_user_params(struct user_params *uparams) { memset(uparams, 0, sizeof(*uparams)); @@ -158,11 +230,11 @@ static void init_user_params(struct user_params *uparams) int main(int argc, char **argv) { + struct fill_buf_param *fill_param = NULL; int tests = ARRAY_SIZE(resctrl_tests); bool test_param_seen = false; struct user_params uparams; - char *span_str = NULL; - int ret, c, i; + int c, i; init_user_params(&uparams); @@ -239,6 +311,10 @@ int main(int argc, char **argv) } last_arg: + fill_param = alloc_fill_buf_param(&uparams); + if (fill_param) + uparams.fill_buf = fill_param; + ksft_print_header(); /* @@ -257,24 +333,11 @@ last_arg: filter_dmesg(); - if (!uparams.benchmark_cmd[0]) { - /* If no benchmark is given by "-b" argument, use fill_buf. */ - uparams.benchmark_cmd[0] = "fill_buf"; - ret = asprintf(&span_str, "%u", DEFAULT_SPAN); - if (ret < 0) - ksft_exit_fail_msg("Out of memory!\n"); - uparams.benchmark_cmd[1] = span_str; - uparams.benchmark_cmd[2] = "1"; - uparams.benchmark_cmd[3] = "0"; - uparams.benchmark_cmd[4] = "false"; - uparams.benchmark_cmd[5] = NULL; - } - ksft_set_plan(tests); for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) run_single_test(resctrl_tests[i], &uparams); - free(span_str); + free(fill_param); ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 8c275f6b4dd7..7c08e936572d 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -12,13 +12,10 @@ #define UNCORE_IMC "uncore_imc" #define READ_FILE_NAME "events/cas_count_read" -#define WRITE_FILE_NAME "events/cas_count_write" #define DYN_PMU_PATH "/sys/bus/event_source/devices" #define SCALE 0.00006103515625 #define MAX_IMCS 20 #define MAX_TOKENS 5 -#define READ 0 -#define WRITE 1 #define CON_MBM_LOCAL_BYTES_PATH \ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" @@ -41,85 +38,71 @@ struct imc_counter_config { static char mbm_total_path[1024]; static int imcs; -static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; +static struct imc_counter_config imc_counters_config[MAX_IMCS]; static const struct resctrl_test *current_test; -void membw_initialize_perf_event_attr(int i, int j) +static void read_mem_bw_initialize_perf_event_attr(int i) { - memset(&imc_counters_config[i][j].pe, 0, + memset(&imc_counters_config[i].pe, 0, sizeof(struct perf_event_attr)); - imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type; - imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr); - imc_counters_config[i][j].pe.disabled = 1; - imc_counters_config[i][j].pe.inherit = 1; - imc_counters_config[i][j].pe.exclude_guest = 0; - imc_counters_config[i][j].pe.config = - imc_counters_config[i][j].umask << 8 | - imc_counters_config[i][j].event; - imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER; - imc_counters_config[i][j].pe.read_format = + imc_counters_config[i].pe.type = imc_counters_config[i].type; + imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); + imc_counters_config[i].pe.disabled = 1; + imc_counters_config[i].pe.inherit = 1; + imc_counters_config[i].pe.exclude_guest = 0; + imc_counters_config[i].pe.config = + imc_counters_config[i].umask << 8 | + imc_counters_config[i].event; + imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; + imc_counters_config[i].pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; } -void membw_ioctl_perf_event_ioc_reset_enable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0); - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); } -void membw_ioctl_perf_event_ioc_disable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); } /* - * get_event_and_umask: Parse config into event and umask + * get_read_event_and_umask: Parse config into event and umask * @cas_count_cfg: Config * @count: iMC number - * @op: Operation (read/write) */ -void get_event_and_umask(char *cas_count_cfg, int count, bool op) +static void get_read_event_and_umask(char *cas_count_cfg, int count) { char *token[MAX_TOKENS]; int i = 0; - strcat(cas_count_cfg, ","); token[0] = strtok(cas_count_cfg, "=,"); for (i = 1; i < MAX_TOKENS; i++) token[i] = strtok(NULL, "=,"); - for (i = 0; i < MAX_TOKENS; i++) { + for (i = 0; i < MAX_TOKENS - 1; i++) { if (!token[i]) break; - if (strcmp(token[i], "event") == 0) { - if (op == READ) - imc_counters_config[count][READ].event = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].event = - strtol(token[i + 1], NULL, 16); - } - if (strcmp(token[i], "umask") == 0) { - if (op == READ) - imc_counters_config[count][READ].umask = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].umask = - strtol(token[i + 1], NULL, 16); - } + if (strcmp(token[i], "event") == 0) + imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); + if (strcmp(token[i], "umask") == 0) + imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); } } -static int open_perf_event(int i, int cpu_no, int j) +static int open_perf_read_event(int i, int cpu_no) { - imc_counters_config[i][j].fd = - perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1, + imc_counters_config[i].fd = + perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, PERF_FLAG_FD_CLOEXEC); - if (imc_counters_config[i][j].fd == -1) { + if (imc_counters_config[i].fd == -1) { fprintf(stderr, "Error opening leader %llx\n", - imc_counters_config[i][j].pe.config); + imc_counters_config[i].pe.config); return -1; } @@ -127,7 +110,7 @@ static int open_perf_event(int i, int cpu_no, int j) return 0; } -/* Get type and config (read and write) of an iMC counter */ +/* Get type and config of an iMC counter's read event. */ static int read_from_imc_dir(char *imc_dir, int count) { char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; @@ -141,7 +124,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) { + if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { ksft_perror("Could not get iMC type"); fclose(fp); @@ -149,9 +132,6 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - imc_counters_config[count][WRITE].type = - imc_counters_config[count][READ].type; - /* Get read config */ sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); fp = fopen(imc_counter_cfg, "r"); @@ -160,7 +140,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { + if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { ksft_perror("Could not get iMC cas count read"); fclose(fp); @@ -168,34 +148,19 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - get_event_and_umask(cas_count_cfg, count, READ); - - /* Get write config */ - sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME); - fp = fopen(imc_counter_cfg, "r"); - if (!fp) { - ksft_perror("Failed to open iMC config file"); - - return -1; - } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { - ksft_perror("Could not get iMC cas count write"); - fclose(fp); - - return -1; - } - fclose(fp); - - get_event_and_umask(cas_count_cfg, count, WRITE); + get_read_event_and_umask(cas_count_cfg, count); return 0; } /* * A system can have 'n' number of iMC (Integrated Memory Controller) - * counters, get that 'n'. For each iMC counter get it's type and config. - * Also, each counter has two configs, one for read and the other for write. - * A config again has two parts, event and umask. + * counters, get that 'n'. Discover the properties of the available + * counters in support of needed performance measurement via perf. + * For each iMC counter get it's type and config. Also obtain each + * counter's event and umask for the memory read events that will be + * measured. + * * Enumerate all these details into an array of structures. * * Return: >= 0 on success. < 0 on failure. @@ -256,55 +221,46 @@ static int num_of_imcs(void) return count; } -int initialize_mem_bw_imc(void) +int initialize_read_mem_bw_imc(void) { - int imc, j; + int imc; imcs = num_of_imcs(); if (imcs <= 0) return imcs; /* Initialize perf_event_attr structures for all iMC's */ - for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_initialize_perf_event_attr(imc, j); - } + for (imc = 0; imc < imcs; imc++) + read_mem_bw_initialize_perf_event_attr(imc); return 0; } -static void perf_close_imc_mem_bw(void) +static void perf_close_imc_read_mem_bw(void) { int mc; for (mc = 0; mc < imcs; mc++) { - if (imc_counters_config[mc][READ].fd != -1) - close(imc_counters_config[mc][READ].fd); - if (imc_counters_config[mc][WRITE].fd != -1) - close(imc_counters_config[mc][WRITE].fd); + if (imc_counters_config[mc].fd != -1) + close(imc_counters_config[mc].fd); } } /* - * perf_open_imc_mem_bw - Open perf fds for IMCs + * perf_open_imc_read_mem_bw - Open perf fds for IMCs * @cpu_no: CPU number that the benchmark PID is bound to * * Return: = 0 on success. < 0 on failure. */ -static int perf_open_imc_mem_bw(int cpu_no) +static int perf_open_imc_read_mem_bw(int cpu_no) { int imc, ret; - for (imc = 0; imc < imcs; imc++) { - imc_counters_config[imc][READ].fd = -1; - imc_counters_config[imc][WRITE].fd = -1; - } + for (imc = 0; imc < imcs; imc++) + imc_counters_config[imc].fd = -1; for (imc = 0; imc < imcs; imc++) { - ret = open_perf_event(imc, cpu_no, READ); - if (ret) - goto close_fds; - ret = open_perf_event(imc, cpu_no, WRITE); + ret = open_perf_read_event(imc, cpu_no); if (ret) goto close_fds; } @@ -312,60 +268,52 @@ static int perf_open_imc_mem_bw(int cpu_no) return 0; close_fds: - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); return -1; } /* - * do_mem_bw_test - Perform memory bandwidth test + * do_imc_read_mem_bw_test - Perform memory bandwidth test * * Runs memory bandwidth test over one second period. Also, handles starting * and stopping of the IMC perf counters around the test. */ -static void do_imc_mem_bw_test(void) +static void do_imc_read_mem_bw_test(void) { int imc; - for (imc = 0; imc < imcs; imc++) { - membw_ioctl_perf_event_ioc_reset_enable(imc, READ); - membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE); - } + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); sleep(1); - /* Stop counters after a second to get results (both read and write) */ - for (imc = 0; imc < imcs; imc++) { - membw_ioctl_perf_event_ioc_disable(imc, READ); - membw_ioctl_perf_event_ioc_disable(imc, WRITE); - } + /* Stop counters after a second to get results. */ + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_disable(imc); } /* - * get_mem_bw_imc - Memory bandwidth as reported by iMC counters - * @bw_report: Bandwidth report type (reads, writes) + * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters * - * Memory bandwidth utilized by a process on a socket can be calculated - * using iMC counters. Perf events are used to read these counters. + * Memory read bandwidth utilized by a process on a socket can be calculated + * using iMC counters' read events. Perf events are used to read these + * counters. * * Return: = 0 on success. < 0 on failure. */ -static int get_mem_bw_imc(const char *bw_report, float *bw_imc) +static int get_read_mem_bw_imc(float *bw_imc) { - float reads, writes, of_mul_read, of_mul_write; + float reads = 0, of_mul_read = 1; int imc; - /* Start all iMC counters to log values (both read and write) */ - reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; - /* - * Get results which are stored in struct type imc_counter_config + * Log read event values from all iMC counters into + * struct imc_counter_config. * Take overflow into consideration before calculating total bandwidth. */ for (imc = 0; imc < imcs; imc++) { struct imc_counter_config *r = - &imc_counters_config[imc][READ]; - struct imc_counter_config *w = - &imc_counters_config[imc][WRITE]; + &imc_counters_config[imc]; if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { @@ -373,12 +321,6 @@ static int get_mem_bw_imc(const char *bw_report, float *bw_imc) return -1; } - if (read(w->fd, &w->return_value, - sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get write bandwidth through iMC"); - return -1; - } - __u64 r_time_enabled = r->return_value.time_enabled; __u64 r_time_running = r->return_value.time_running; @@ -386,27 +328,10 @@ static int get_mem_bw_imc(const char *bw_report, float *bw_imc) of_mul_read = (float)r_time_enabled / (float)r_time_running; - __u64 w_time_enabled = w->return_value.time_enabled; - __u64 w_time_running = w->return_value.time_running; - - if (w_time_enabled != w_time_running) - of_mul_write = (float)w_time_enabled / - (float)w_time_running; reads += r->return_value.value * of_mul_read * SCALE; - writes += w->return_value.value * of_mul_write * SCALE; } - if (strcmp(bw_report, "reads") == 0) { - *bw_imc = reads; - return 0; - } - - if (strcmp(bw_report, "writes") == 0) { - *bw_imc = writes; - return 0; - } - - *bw_imc = reads + writes; + *bw_imc = reads; return 0; } @@ -448,7 +373,7 @@ static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) return 0; } -static pid_t bm_pid, ppid; +static pid_t bm_pid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { @@ -506,13 +431,6 @@ void signal_handler_unregister(void) } } -static void parent_exit(pid_t ppid) -{ - kill(ppid, SIGKILL); - umount_resctrlfs(); - exit(EXIT_FAILURE); -} - /* * print_results_bw: the memory bandwidth results are stored in a file * @filename: file that stores the results @@ -552,35 +470,31 @@ static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, } /* - * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs + * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs * @uparams: User supplied parameters * @param: Parameters passed to resctrl_val() * @bm_pid: PID that runs the benchmark - * @bw_report: Bandwidth report type (reads, writes) * * Measure memory bandwidth from resctrl and from another source which is * perf imc value or could be something else if perf imc event is not * available. Compare the two values to validate resctrl value. It takes * 1 sec to measure the data. + * resctrl does not distinguish between read and write operations so + * its data includes all memory operations. */ -int measure_mem_bw(const struct user_params *uparams, - struct resctrl_val_param *param, pid_t bm_pid, - const char *bw_report) +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) { unsigned long bw_resc, bw_resc_start, bw_resc_end; FILE *mem_bw_fp; float bw_imc; int ret; - bw_report = get_bw_report_type(bw_report); - if (!bw_report) - return -1; - mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); if (!mem_bw_fp) return -1; - ret = perf_open_imc_mem_bw(uparams->cpu); + ret = perf_open_imc_read_mem_bw(uparams->cpu); if (ret < 0) goto close_fp; @@ -590,17 +504,17 @@ int measure_mem_bw(const struct user_params *uparams, rewind(mem_bw_fp); - do_imc_mem_bw_test(); + do_imc_read_mem_bw_test(); ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); if (ret < 0) goto close_imc; - ret = get_mem_bw_imc(bw_report, &bw_imc); + ret = get_read_mem_bw_imc(&bw_imc); if (ret < 0) goto close_imc; - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); fclose(mem_bw_fp); bw_resc = (bw_resc_end - bw_resc_start) / MB; @@ -608,87 +522,30 @@ int measure_mem_bw(const struct user_params *uparams, return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); close_imc: - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); close_fp: fclose(mem_bw_fp); return ret; } /* - * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) - * in specified signal. Direct benchmark stdio to /dev/null. - * @signum: signal number - * @info: signal info - * @ucontext: user context in signal handling - */ -static void run_benchmark(int signum, siginfo_t *info, void *ucontext) -{ - int operation, ret, memflush; - char **benchmark_cmd; - size_t span; - bool once; - FILE *fp; - - benchmark_cmd = info->si_ptr; - - /* - * Direct stdio of child to /dev/null, so that only parent writes to - * stdio (console) - */ - fp = freopen("/dev/null", "w", stdout); - if (!fp) { - ksft_perror("Unable to direct benchmark status to /dev/null"); - parent_exit(ppid); - } - - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { - /* Execute default fill_buf benchmark */ - span = strtoul(benchmark_cmd[1], NULL, 10); - memflush = atoi(benchmark_cmd[2]); - operation = atoi(benchmark_cmd[3]); - if (!strcmp(benchmark_cmd[4], "true")) { - once = true; - } else if (!strcmp(benchmark_cmd[4], "false")) { - once = false; - } else { - ksft_print_msg("Invalid once parameter\n"); - parent_exit(ppid); - } - - if (run_fill_buf(span, memflush, operation, once)) - fprintf(stderr, "Error in running fill buffer\n"); - } else { - /* Execute specified benchmark */ - ret = execvp(benchmark_cmd[0], benchmark_cmd); - if (ret) - ksft_perror("execvp"); - } - - fclose(stdout); - ksft_print_msg("Unable to run specified benchmark\n"); - parent_exit(ppid); -} - -/* * resctrl_val: execute benchmark and measure memory bandwidth on * the benchmark * @test: test information structure * @uparams: user supplied parameters - * @benchmark_cmd: benchmark command and its arguments * @param: parameters passed to resctrl_val() * * Return: 0 when the test was run, < 0 on error. */ int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param) { - struct sigaction sigact; - int ret = 0, pipefd[2]; - char pipe_message = 0; - union sigval value; + unsigned char *buf = NULL; + cpu_set_t old_affinity; int domain_id; + int ret = 0; + pid_t ppid; if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); @@ -699,111 +556,65 @@ int resctrl_val(const struct resctrl_test *test, return ret; } - /* - * If benchmark wasn't successfully started by child, then child should - * kill parent, so save parent's pid - */ ppid = getpid(); - if (pipe(pipefd)) { - ksft_perror("Unable to create pipe"); + /* Taskset test to specified CPU. */ + ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); + if (ret) + return ret; - return -1; + /* Write test to specified control & monitoring group in resctrl FS. */ + ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); + if (ret) + goto reset_affinity; + + if (param->init) { + ret = param->init(param, domain_id); + if (ret) + goto reset_affinity; } /* - * Fork to start benchmark, save child's pid so that it can be killed - * when needed + * If not running user provided benchmark, run the default + * "fill_buf". First phase of "fill_buf" is to prepare the + * buffer that the benchmark will operate on. No measurements + * are needed during this phase and prepared memory will be + * passed to next part of benchmark via copy-on-write thus + * no impact on the benchmark that relies on reading from + * memory only. */ + if (param->fill_buf) { + buf = alloc_buffer(param->fill_buf->buf_size, + param->fill_buf->memflush); + if (!buf) { + ret = -ENOMEM; + goto reset_affinity; + } + } + fflush(stdout); bm_pid = fork(); if (bm_pid == -1) { + ret = -errno; ksft_perror("Unable to fork"); - - return -1; + goto free_buf; } - if (bm_pid == 0) { - /* - * Mask all signals except SIGUSR1, parent uses SIGUSR1 to - * start benchmark - */ - sigfillset(&sigact.sa_mask); - sigdelset(&sigact.sa_mask, SIGUSR1); - - sigact.sa_sigaction = run_benchmark; - sigact.sa_flags = SA_SIGINFO; - - /* Register for "SIGUSR1" signal from parent */ - if (sigaction(SIGUSR1, &sigact, NULL)) { - ksft_perror("Can't register child for signal"); - parent_exit(ppid); - } - - /* Tell parent that child is ready */ - close(pipefd[0]); - pipe_message = 1; - if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed signaling parent process"); - close(pipefd[1]); - return -1; - } - close(pipefd[1]); - - /* Suspend child until delivery of "SIGUSR1" from parent */ - sigsuspend(&sigact.sa_mask); - - ksft_perror("Child is done"); - parent_exit(ppid); - } - - ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); - /* - * The cast removes constness but nothing mutates benchmark_cmd within - * the context of this process. At the receiving process, it becomes - * argv, which is mutable, on exec() but that's after fork() so it - * doesn't matter for the process running the tests. + * What needs to be measured runs in separate process until + * terminated. */ - value.sival_ptr = (void *)benchmark_cmd; - - /* Taskset benchmark to specified cpu */ - ret = taskset_benchmark(bm_pid, uparams->cpu, NULL); - if (ret) - goto out; - - /* Write benchmark to specified control&monitoring grp in resctrl FS */ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); - if (ret) - goto out; - - if (param->init) { - ret = param->init(param, domain_id); - if (ret) - goto out; - } - - /* Parent waits for child to be ready. */ - close(pipefd[1]); - while (pipe_message != 1) { - if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed reading message from child process"); - close(pipefd[0]); - goto out; - } + if (bm_pid == 0) { + if (param->fill_buf) + fill_cache_read(buf, param->fill_buf->buf_size, false); + else if (uparams->benchmark_cmd[0]) + execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); + exit(EXIT_SUCCESS); } - close(pipefd[0]); - /* Signal child to start benchmark */ - if (sigqueue(bm_pid, SIGUSR1, value) == -1) { - ksft_perror("sigqueue SIGUSR1 to child"); - ret = -1; - goto out; - } + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); - /* Give benchmark enough time to fully run */ + /* Give benchmark enough time to fully run. */ sleep(1); /* Test runs until the callback setup() tells the test to stop. */ @@ -821,8 +632,10 @@ int resctrl_val(const struct resctrl_test *test, break; } -out: kill(bm_pid, SIGKILL); - +free_buf: + free(buf); +reset_affinity: + taskset_restore(ppid, &old_affinity); return ret; } diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 250c320349a7..d38d6dd90be4 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -182,7 +182,7 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size return -1; } - if (fscanf(fp, "%s", cache_str) <= 0) { + if (fscanf(fp, "%63s", cache_str) <= 0) { ksft_perror("Could not get cache_size"); fclose(fp); @@ -831,23 +831,6 @@ int filter_dmesg(void) return 0; } -const char *get_bw_report_type(const char *bw_report) -{ - if (strcmp(bw_report, "reads") == 0) - return bw_report; - if (strcmp(bw_report, "writes") == 0) - return bw_report; - if (strcmp(bw_report, "nt-writes") == 0) { - return "writes"; - } - if (strcmp(bw_report, "total") == 0) - return bw_report; - - fprintf(stderr, "Requested iMC bandwidth report type unavailable\n"); - - return NULL; -} - int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 7ce03d832b64..099b8c1f46f8 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn +RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/abi/.gitignore b/tools/testing/selftests/riscv/abi/.gitignore new file mode 100644 index 000000000000..b38358f91c4d --- /dev/null +++ b/tools/testing/selftests/riscv/abi/.gitignore @@ -0,0 +1 @@ +pointer_masking diff --git a/tools/testing/selftests/riscv/abi/Makefile b/tools/testing/selftests/riscv/abi/Makefile new file mode 100644 index 000000000000..ed82ff9c664e --- /dev/null +++ b/tools/testing/selftests/riscv/abi/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := pointer_masking + +include ../../lib.mk + +$(OUTPUT)/pointer_masking: pointer_masking.c + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c new file mode 100644 index 000000000000..dee41b7ee3e3 --- /dev/null +++ b/tools/testing/selftests/riscv/abi/pointer_masking.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <errno.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../../kselftest.h" + +#ifndef PR_PMLEN_SHIFT +#define PR_PMLEN_SHIFT 24 +#endif +#ifndef PR_PMLEN_MASK +#define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) +#endif + +static int dev_zero; + +static int pipefd[2]; + +static sigjmp_buf jmpbuf; + +static void sigsegv_handler(int sig) +{ + siglongjmp(jmpbuf, 1); +} + +static int min_pmlen; +static int max_pmlen; + +static inline bool valid_pmlen(int pmlen) +{ + return pmlen == 0 || pmlen == 7 || pmlen == 16; +} + +static void test_pmlen(void) +{ + ksft_print_msg("Testing available PMLEN values\n"); + + for (int request = 0; request <= 16; request++) { + int pmlen, ret; + + ret = prctl(PR_SET_TAGGED_ADDR_CTRL, request << PR_PMLEN_SHIFT, 0, 0, 0); + if (ret) + goto pr_set_error; + + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + ksft_test_result(ret >= 0, "PMLEN=%d PR_GET_TAGGED_ADDR_CTRL\n", request); + if (ret < 0) + goto pr_get_error; + + pmlen = (ret & PR_PMLEN_MASK) >> PR_PMLEN_SHIFT; + ksft_test_result(pmlen >= request, "PMLEN=%d constraint\n", request); + ksft_test_result(valid_pmlen(pmlen), "PMLEN=%d validity\n", request); + + if (min_pmlen == 0) + min_pmlen = pmlen; + if (max_pmlen < pmlen) + max_pmlen = pmlen; + + continue; + +pr_set_error: + ksft_test_result_skip("PMLEN=%d PR_GET_TAGGED_ADDR_CTRL\n", request); +pr_get_error: + ksft_test_result_skip("PMLEN=%d constraint\n", request); + ksft_test_result_skip("PMLEN=%d validity\n", request); + } + + if (max_pmlen == 0) + ksft_exit_fail_msg("Failed to enable pointer masking\n"); +} + +static int set_tagged_addr_ctrl(int pmlen, bool tagged_addr_abi) +{ + int arg, ret; + + arg = pmlen << PR_PMLEN_SHIFT | tagged_addr_abi; + ret = prctl(PR_SET_TAGGED_ADDR_CTRL, arg, 0, 0, 0); + if (!ret) { + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + if (ret == arg) + return 0; + } + + return ret < 0 ? -errno : -ENODATA; +} + +static void test_dereference_pmlen(int pmlen) +{ + static volatile int i; + volatile int *p; + int ret; + + ret = set_tagged_addr_ctrl(pmlen, false); + if (ret) + return ksft_test_result_error("PMLEN=%d setup (%d)\n", pmlen, ret); + + i = pmlen; + + if (pmlen) { + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen)); + + /* These dereferences should succeed. */ + if (sigsetjmp(jmpbuf, 1)) + return ksft_test_result_fail("PMLEN=%d valid tag\n", pmlen); + if (*p != pmlen) + return ksft_test_result_fail("PMLEN=%d bad value\n", pmlen); + ++*p; + } + + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen - 1)); + + /* These dereferences should raise SIGSEGV. */ + if (sigsetjmp(jmpbuf, 1)) + return ksft_test_result_pass("PMLEN=%d dereference\n", pmlen); + ++*p; + ksft_test_result_fail("PMLEN=%d invalid tag\n", pmlen); +} + +static void test_dereference(void) +{ + ksft_print_msg("Testing userspace pointer dereference\n"); + + signal(SIGSEGV, sigsegv_handler); + + test_dereference_pmlen(0); + test_dereference_pmlen(min_pmlen); + test_dereference_pmlen(max_pmlen); + + signal(SIGSEGV, SIG_DFL); +} + +static void execve_child_sigsegv_handler(int sig) +{ + exit(42); +} + +static int execve_child(void) +{ + static volatile int i; + volatile int *p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - 7)); + + signal(SIGSEGV, execve_child_sigsegv_handler); + + /* This dereference should raise SIGSEGV. */ + return *p; +} + +static void test_fork_exec(void) +{ + int ret, status; + + ksft_print_msg("Testing fork/exec behavior\n"); + + ret = set_tagged_addr_ctrl(min_pmlen, false); + if (ret) + return ksft_test_result_error("setup (%d)\n", ret); + + if (fork()) { + wait(&status); + ksft_test_result(WIFEXITED(status) && WEXITSTATUS(status) == 42, + "dereference after fork\n"); + } else { + static volatile int i = 42; + volatile int *p; + + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - min_pmlen)); + + /* This dereference should succeed. */ + exit(*p); + } + + if (fork()) { + wait(&status); + ksft_test_result(WIFEXITED(status) && WEXITSTATUS(status) == 42, + "dereference after fork+exec\n"); + } else { + /* Will call execve_child(). */ + execve("/proc/self/exe", (char *const []) { "", NULL }, NULL); + } +} + +static void test_tagged_addr_abi_sysctl(void) +{ + char value; + int fd; + + ksft_print_msg("Testing tagged address ABI sysctl\n"); + + fd = open("/proc/sys/abi/tagged_addr_disabled", O_WRONLY); + if (fd < 0) { + ksft_test_result_skip("failed to open sysctl file\n"); + ksft_test_result_skip("failed to open sysctl file\n"); + return; + } + + value = '1'; + pwrite(fd, &value, 1, 0); + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, + "sysctl disabled\n"); + + value = '0'; + pwrite(fd, &value, 1, 0); + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, + "sysctl enabled\n"); + + set_tagged_addr_ctrl(0, false); + + close(fd); +} + +static void test_tagged_addr_abi_pmlen(int pmlen) +{ + int i, *p, ret; + + i = ~pmlen; + + if (pmlen) { + p = (int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen)); + + ret = set_tagged_addr_ctrl(pmlen, false); + if (ret) + return ksft_test_result_error("PMLEN=%d ABI disabled setup (%d)\n", + pmlen, ret); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d ABI disabled write\n", pmlen); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d ABI disabled read\n", pmlen); + + if (i != ~pmlen) + return ksft_test_result_fail("PMLEN=%d ABI disabled value\n", pmlen); + + ret = set_tagged_addr_ctrl(pmlen, true); + if (ret) + return ksft_test_result_error("PMLEN=%d ABI enabled setup (%d)\n", + pmlen, ret); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret != sizeof(*p)) + return ksft_test_result_fail("PMLEN=%d ABI enabled write\n", pmlen); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret != sizeof(*p)) + return ksft_test_result_fail("PMLEN=%d ABI enabled read\n", pmlen); + + if (i) + return ksft_test_result_fail("PMLEN=%d ABI enabled value\n", pmlen); + + i = ~pmlen; + } else { + /* The tagged address ABI cannot be enabled when PMLEN == 0. */ + ret = set_tagged_addr_ctrl(pmlen, true); + if (ret != -EINVAL) + return ksft_test_result_error("PMLEN=%d ABI setup (%d)\n", + pmlen, ret); + } + + p = (int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen - 1)); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d invalid tag write (%d)\n", pmlen, errno); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d invalid tag read\n", pmlen); + + if (i != ~pmlen) + return ksft_test_result_fail("PMLEN=%d invalid tag value\n", pmlen); + + ksft_test_result_pass("PMLEN=%d tagged address ABI\n", pmlen); +} + +static void test_tagged_addr_abi(void) +{ + ksft_print_msg("Testing tagged address ABI\n"); + + test_tagged_addr_abi_pmlen(0); + test_tagged_addr_abi_pmlen(min_pmlen); + test_tagged_addr_abi_pmlen(max_pmlen); +} + +static struct test_info { + unsigned int nr_tests; + void (*test_fn)(void); +} tests[] = { + { .nr_tests = 17 * 3, test_pmlen }, + { .nr_tests = 3, test_dereference }, + { .nr_tests = 2, test_fork_exec }, + { .nr_tests = 2, test_tagged_addr_abi_sysctl }, + { .nr_tests = 3, test_tagged_addr_abi }, +}; + +int main(int argc, char **argv) +{ + unsigned int plan = 0; + int ret; + + /* Check if this is the child process after execve(). */ + if (!argv[0][0]) + return execve_child(); + + dev_zero = open("/dev/zero", O_RDWR); + if (dev_zero < 0) + return 1; + + /* Write to a pipe so the kernel must dereference the buffer pointer. */ + ret = pipe(pipefd); + if (ret) + return 1; + + ksft_print_header(); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + plan += tests[i].nr_tests; + + ksft_set_plan(plan); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + tests[i].test_fn(); + + ksft_finished(); +} diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; - rseq_feature_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -209,7 +250,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d7364ea4d201..4e217b620e0c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -68,12 +68,6 @@ extern unsigned int rseq_size; /* Flags used during rseq registration. */ extern unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -extern unsigned int rseq_feature_size; - enum rseq_mo { RSEQ_MO_RELAXED = 0, RSEQ_MO_CONSUME = 1, /* Unused */ @@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(void) static inline bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); } /* @@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node_id(void) static inline bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); } static inline uint32_t rseq_current_mm_cid(void) diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 55198ecc04db..9dbb395c5c79 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I$(top_srcdir)/usr/include LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 38a8e67de77d..e103097d0b5b 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -25,6 +25,12 @@ static char *rtc_file = "/dev/rtc0"; +enum rtc_alarm_state { + RTC_ALARM_UNKNOWN, + RTC_ALARM_ENABLED, + RTC_ALARM_DISABLED, +}; + FIXTURE(rtc) { int fd; }; @@ -82,6 +88,24 @@ static void nanosleep_with_retries(long ns) } } +static enum rtc_alarm_state get_rtc_alarm_state(int fd) +{ + struct rtc_param param = { 0 }; + int rc; + + /* Validate kernel reflects unsupported RTC alarm state */ + param.param = RTC_PARAM_FEATURES; + param.index = 0; + rc = ioctl(fd, RTC_PARAM_GET, ¶m); + if (rc < 0) + return RTC_ALARM_UNKNOWN; + + if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) + return RTC_ALARM_DISABLED; + + return RTC_ALARM_ENABLED; +} + TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) { int rc; long iter_count = 0; @@ -197,11 +221,16 @@ TEST_F(rtc, alarm_alm_set) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -210,6 +239,11 @@ TEST_F(rtc, alarm_alm_set) { rc = ioctl(self->fd, RTC_ALM_SET, &tm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -255,11 +289,16 @@ TEST_F(rtc, alarm_wkalm_set) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -270,6 +309,11 @@ TEST_F(rtc, alarm_wkalm_set) { rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -307,11 +351,16 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -320,6 +369,11 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { rc = ioctl(self->fd, RTC_ALM_SET, &tm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -365,11 +419,16 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -380,6 +439,11 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 0754a2c110a1..011762224600 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -3,23 +3,12 @@ include ../../../build/Build.include include ../../../scripts/Makefile.arch include ../../../scripts/Makefile.include -include ../lib.mk -ifneq ($(LLVM),) -ifneq ($(filter %/,$(LLVM)),) -LLVM_PREFIX := $(LLVM) -else ifneq ($(filter -%,$(LLVM)),) -LLVM_SUFFIX := $(LLVM) -endif - -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as -else -CC := gcc -endif # LLVM +TEST_GEN_PROGS := runner -ifneq ($(CROSS_COMPILE),) -$(error CROSS_COMPILE not supported for scx selftests) -endif # CROSS_COMPILE +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +include ../lib.mk CURDIR := $(abspath .) REPOROOT := $(abspath ../../../..) @@ -34,18 +23,23 @@ GENHDR := $(GENDIR)/autoconf.h SCXTOOLSDIR := $(TOOLSDIR)/sched_ext SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include -OUTPUT_DIR := $(CURDIR)/build +OUTPUT_DIR := $(OUTPUT)/build OBJ_DIR := $(OUTPUT_DIR)/obj INCLUDE_DIR := $(OUTPUT_DIR)/include BPFOBJ_DIR := $(OBJ_DIR)/libbpf SCXOBJ_DIR := $(OBJ_DIR)/sched_ext BPFOBJ := $(BPFOBJ_DIR)/libbpf.a LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a -DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool -HOST_BUILD_DIR := $(OBJ_DIR) -HOST_OUTPUT_DIR := $(OUTPUT_DIR) -VMLINUX_BTF_PATHS ?= ../../../../vmlinux \ +DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool +HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool +HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/ +HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/ +HOST_DESTDIR := $(OUTPUT_DIR)/host/ + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) @@ -80,17 +74,23 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) \ -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat \ -I$(INCLUDE_DIR) -I$(APIDIR) -I$(SCXTOOLSINCDIR) \ -I$(REPOROOT)/include \ - $(call get_sys_includes,$(CLANG)) \ + $(CLANG_SYS_INCLUDES) \ -Wall -Wno-compare-distinct-pointer-types \ -Wno-incompatible-function-pointer-types \ -O2 -mcpu=v3 @@ -98,7 +98,7 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(OBJ_DIR)/libbpf \ $(OBJ_DIR)/bpftool $(OBJ_DIR)/resolve_btfids \ - $(INCLUDE_DIR) $(SCXOBJ_DIR)) + $(HOST_OBJ_DIR) $(INCLUDE_DIR) $(SCXOBJ_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) @@ -108,18 +108,19 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(OBJ_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \ + ARCH=$(ARCH) CC="$(CC)" CROSS_COMPILE=$(CROSS_COMPILE) \ EXTRA_CFLAGS='-g -O0 -fPIC' \ DESTDIR=$(OUTPUT_DIR) prefix= all install_headers $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(LIBBPF_OUTPUT) | $(OBJ_DIR)/bpftool + $(LIBBPF_OUTPUT) | $(HOST_OBJ_DIR) $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ - OUTPUT=$(OBJ_DIR)/bpftool/ \ - LIBBPF_OUTPUT=$(OBJ_DIR)/libbpf/ \ - LIBBPF_DESTDIR=$(OUTPUT_DIR)/ \ - prefix= DESTDIR=$(OUTPUT_DIR)/ install-bin + OUTPUT=$(HOST_OBJ_DIR)/ \ + LIBBPF_OUTPUT=$(HOST_LIBBPF_OUTPUT) \ + LIBBPF_DESTDIR=$(HOST_LIBBPF_DESTDIR) \ + prefix= DESTDIR=$(HOST_DESTDIR) install-bin $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) @@ -150,9 +151,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP override define CLEAN rm -rf $(OUTPUT_DIR) - rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h rm -f $(TEST_GEN_PROGS) - rm -f runner endef # Every testcase takes all of the BPF progs are dependencies by default. This @@ -185,7 +184,7 @@ auto-test-targets := \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) -$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) +$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) $(BPFOBJ) $(CC) $(CFLAGS) -c $< -o $@ # Create all of the test targets object files, whose testcase objects will be @@ -196,21 +195,15 @@ $(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) # function doesn't support using implicit rules otherwise. $(testcase-targets): $(SCXOBJ_DIR)/%.o: %.c $(SCXOBJ_DIR)/runner.o $(all_test_bpfprogs) | $(SCXOBJ_DIR) $(eval test=$(patsubst %.o,%.c,$(notdir $@))) - $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/runner.o + $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/util.o: util.c | $(SCXOBJ_DIR) $(CC) $(CFLAGS) -c $< -o $@ -runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) +$(OUTPUT)/runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) @echo "$(testcase-targets)" $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -TEST_GEN_PROGS := runner - -all: runner - -.PHONY: all clean help - .DEFAULT_GOAL := all .DELETE_ON_ERROR: diff --git a/tools/testing/selftests/sched_ext/create_dsq.bpf.c b/tools/testing/selftests/sched_ext/create_dsq.bpf.c index 23f79ed343f0..2cfc4ffd60e2 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/create_dsq.bpf.c @@ -51,8 +51,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init) SEC(".struct_ops.link") struct sched_ext_ops create_dsq_ops = { - .init_task = create_dsq_init_task, - .exit_task = create_dsq_exit_task, - .init = create_dsq_init, + .init_task = (void *) create_dsq_init_task, + .exit_task = (void *) create_dsq_exit_task, + .init = (void *) create_dsq_init, .name = "create_dsq", }; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index e97ad41d354a..37d9bf6fb745 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -35,8 +35,8 @@ void BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_bogus_dsq_fail_ops = { - .select_cpu = ddsp_bogus_dsq_fail_select_cpu, - .exit = ddsp_bogus_dsq_fail_exit, + .select_cpu = (void *) ddsp_bogus_dsq_fail_select_cpu, + .exit = (void *) ddsp_bogus_dsq_fail_exit, .name = "ddsp_bogus_dsq_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dde7e7dafbfb..dffc97d9cdf1 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -32,8 +32,8 @@ void BPF_STRUCT_OPS(ddsp_vtimelocal_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_vtimelocal_fail_ops = { - .select_cpu = ddsp_vtimelocal_fail_select_cpu, - .exit = ddsp_vtimelocal_fail_exit, + .select_cpu = (void *) ddsp_vtimelocal_fail_select_cpu, + .exit = (void *) ddsp_vtimelocal_fail_exit, .name = "ddsp_vtimelocal_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index efb4672decb4..6a7db1502c29 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -56,10 +56,10 @@ void BPF_STRUCT_OPS(dsp_local_on_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops dsp_local_on_ops = { - .select_cpu = dsp_local_on_select_cpu, - .enqueue = dsp_local_on_enqueue, - .dispatch = dsp_local_on_dispatch, - .exit = dsp_local_on_exit, + .select_cpu = (void *) dsp_local_on_select_cpu, + .enqueue = (void *) dsp_local_on_enqueue, + .dispatch = (void *) dsp_local_on_dispatch, + .exit = (void *) dsp_local_on_exit, .name = "dsp_local_on", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c index b0b99531d5d5..e1bd13e48889 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c @@ -12,10 +12,18 @@ char _license[] SEC("license") = "GPL"; +u32 exit_kind; + +void BPF_STRUCT_OPS_SLEEPABLE(enq_last_no_enq_fails_exit, struct scx_exit_info *info) +{ + exit_kind = info->kind; +} + SEC(".struct_ops.link") struct sched_ext_ops enq_last_no_enq_fails_ops = { .name = "enq_last_no_enq_fails", /* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */ .flags = SCX_OPS_ENQ_LAST, + .exit = (void *) enq_last_no_enq_fails_exit, .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 2a3eda5e2c0b..73e679953e27 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -31,8 +31,12 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops); - if (link) { - SCX_ERR("Incorrectly succeeded in to attaching scheduler"); + if (!link) { + SCX_ERR("Incorrectly failed at attaching scheduler"); + return SCX_TEST_FAIL; + } + if (!skel->bss->exit_kind) { + SCX_ERR("Incorrectly stayed loaded"); return SCX_TEST_FAIL; } @@ -50,7 +54,7 @@ static void cleanup(void *ctx) struct scx_test enq_last_no_enq_fails = { .name = "enq_last_no_enq_fails", - .description = "Verify we fail to load a scheduler if we specify " + .description = "Verify we eject a scheduler if we specify " "the SCX_OPS_ENQ_LAST flag without defining " "ops.enqueue()", .setup = setup, diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index b3dfc1033cd6..1efb50d61040 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -36,8 +36,8 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops enq_select_cpu_fails_ops = { - .select_cpu = enq_select_cpu_fails_select_cpu, - .enqueue = enq_select_cpu_fails_enqueue, + .select_cpu = (void *) enq_select_cpu_fails_select_cpu, + .enqueue = (void *) enq_select_cpu_fails_enqueue, .name = "enq_select_cpu_fails", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index ae12ddaac921..d75d4faf07f6 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -15,6 +15,8 @@ UEI_DEFINE(uei); #define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point) +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -31,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -39,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_consume(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) @@ -67,18 +69,18 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init) if (exit_point == EXIT_INIT) EXIT_CLEANLY(); - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } SEC(".struct_ops.link") struct sched_ext_ops exit_ops = { - .select_cpu = exit_select_cpu, - .enqueue = exit_enqueue, - .dispatch = exit_dispatch, - .init_task = exit_init_task, - .enable = exit_enable, - .exit = exit_exit, - .init = exit_init, + .select_cpu = (void *) exit_select_cpu, + .enqueue = (void *) exit_enqueue, + .dispatch = (void *) exit_dispatch, + .init_task = (void *) exit_init_task, + .enable = (void *) exit_enable, + .exit = (void *) exit_exit, + .init = (void *) exit_init, .name = "exit", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/hotplug.bpf.c b/tools/testing/selftests/sched_ext/hotplug.bpf.c index 8f2601db39f3..6c9f25c9bf53 100644 --- a/tools/testing/selftests/sched_ext/hotplug.bpf.c +++ b/tools/testing/selftests/sched_ext/hotplug.bpf.c @@ -46,16 +46,16 @@ void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_offline, s32 cpu) SEC(".struct_ops.link") struct sched_ext_ops hotplug_cb_ops = { - .cpu_online = hotplug_cpu_online, - .cpu_offline = hotplug_cpu_offline, - .exit = hotplug_exit, + .cpu_online = (void *) hotplug_cpu_online, + .cpu_offline = (void *) hotplug_cpu_offline, + .exit = (void *) hotplug_exit, .name = "hotplug_cbs", .timeout_ms = 1000U, }; SEC(".struct_ops.link") struct sched_ext_ops hotplug_nocb_ops = { - .exit = hotplug_exit, + .exit = (void *) hotplug_exit, .name = "hotplug_nocbs", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c index 47ea89a626c3..5eb9edb1837d 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c @@ -45,9 +45,9 @@ void BPF_STRUCT_OPS(cnt_disable, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops init_enable_count_ops = { - .init_task = cnt_init_task, - .exit_task = cnt_exit_task, - .enable = cnt_enable, - .disable = cnt_disable, + .init_task = (void *) cnt_init_task, + .exit_task = (void *) cnt_exit_task, + .enable = (void *) cnt_enable, + .disable = (void *) cnt_disable, .name = "init_enable_count", }; diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 00bfa9cb95d3..4d4cd8d966db 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -131,34 +131,34 @@ void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) SEC(".struct_ops.link") struct sched_ext_ops maximal_ops = { - .select_cpu = maximal_select_cpu, - .enqueue = maximal_enqueue, - .dequeue = maximal_dequeue, - .dispatch = maximal_dispatch, - .runnable = maximal_runnable, - .running = maximal_running, - .stopping = maximal_stopping, - .quiescent = maximal_quiescent, - .yield = maximal_yield, - .core_sched_before = maximal_core_sched_before, - .set_weight = maximal_set_weight, - .set_cpumask = maximal_set_cpumask, - .update_idle = maximal_update_idle, - .cpu_acquire = maximal_cpu_acquire, - .cpu_release = maximal_cpu_release, - .cpu_online = maximal_cpu_online, - .cpu_offline = maximal_cpu_offline, - .init_task = maximal_init_task, - .enable = maximal_enable, - .exit_task = maximal_exit_task, - .disable = maximal_disable, - .cgroup_init = maximal_cgroup_init, - .cgroup_exit = maximal_cgroup_exit, - .cgroup_prep_move = maximal_cgroup_prep_move, - .cgroup_move = maximal_cgroup_move, - .cgroup_cancel_move = maximal_cgroup_cancel_move, - .cgroup_set_weight = maximal_cgroup_set_weight, - .init = maximal_init, - .exit = maximal_exit, + .select_cpu = (void *) maximal_select_cpu, + .enqueue = (void *) maximal_enqueue, + .dequeue = (void *) maximal_dequeue, + .dispatch = (void *) maximal_dispatch, + .runnable = (void *) maximal_runnable, + .running = (void *) maximal_running, + .stopping = (void *) maximal_stopping, + .quiescent = (void *) maximal_quiescent, + .yield = (void *) maximal_yield, + .core_sched_before = (void *) maximal_core_sched_before, + .set_weight = (void *) maximal_set_weight, + .set_cpumask = (void *) maximal_set_cpumask, + .update_idle = (void *) maximal_update_idle, + .cpu_acquire = (void *) maximal_cpu_acquire, + .cpu_release = (void *) maximal_cpu_release, + .cpu_online = (void *) maximal_cpu_online, + .cpu_offline = (void *) maximal_cpu_offline, + .init_task = (void *) maximal_init_task, + .enable = (void *) maximal_enable, + .exit_task = (void *) maximal_exit_task, + .disable = (void *) maximal_disable, + .cgroup_init = (void *) maximal_cgroup_init, + .cgroup_exit = (void *) maximal_cgroup_exit, + .cgroup_prep_move = (void *) maximal_cgroup_prep_move, + .cgroup_move = (void *) maximal_cgroup_move, + .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, + .cgroup_set_weight = (void *) maximal_cgroup_set_weight, + .init = (void *) maximal_init, + .exit = (void *) maximal_exit, .name = "maximal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null.bpf.c b/tools/testing/selftests/sched_ext/maybe_null.bpf.c index 27d0f386acfb..cf4ae870cd4e 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null.bpf.c @@ -29,8 +29,8 @@ bool BPF_STRUCT_OPS(maybe_null_success_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_success = { - .dispatch = maybe_null_success_dispatch, - .yield = maybe_null_success_yield, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_success_dispatch, + .yield = (void *) maybe_null_success_yield, + .enable = (void *) maybe_null_running, .name = "minimal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c index c0641050271d..ec724d7b33d1 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c @@ -19,7 +19,7 @@ void BPF_STRUCT_OPS(maybe_null_fail_dispatch, s32 cpu, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .dispatch = maybe_null_fail_dispatch, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_fail_dispatch, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_dispatch", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c index 3c1740028e3b..e6552cace020 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c @@ -22,7 +22,7 @@ bool BPF_STRUCT_OPS(maybe_null_fail_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .yield = maybe_null_fail_yield, - .enable = maybe_null_running, + .yield = (void *) maybe_null_fail_yield, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_yield", }; diff --git a/tools/testing/selftests/sched_ext/prog_run.bpf.c b/tools/testing/selftests/sched_ext/prog_run.bpf.c index 6a4d7c48e3f2..00c267626a68 100644 --- a/tools/testing/selftests/sched_ext/prog_run.bpf.c +++ b/tools/testing/selftests/sched_ext/prog_run.bpf.c @@ -28,6 +28,6 @@ void BPF_STRUCT_OPS(prog_run_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops prog_run_ops = { - .exit = prog_run_exit, + .exit = (void *) prog_run_exit, .name = "prog_run", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index 2ed2991afafe..f171ac470970 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -35,6 +35,6 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_ops = { - .enqueue = select_cpu_dfl_enqueue, + .enqueue = (void *) select_cpu_dfl_enqueue, .name = "select_cpu_dfl", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 4bb5abb2d369..9efdbb7da928 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -82,8 +82,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_nodispatch_ops = { - .select_cpu = select_cpu_dfl_nodispatch_select_cpu, - .enqueue = select_cpu_dfl_nodispatch_enqueue, - .init_task = select_cpu_dfl_nodispatch_init_task, + .select_cpu = (void *) select_cpu_dfl_nodispatch_select_cpu, + .enqueue = (void *) select_cpu_dfl_nodispatch_enqueue, + .init_task = (void *) select_cpu_dfl_nodispatch_init_task, .name = "select_cpu_dfl_nodispatch", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index f0b96a4a04b2..59bfc4f36167 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -35,7 +35,7 @@ dispatch: SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_ops = { - .select_cpu = select_cpu_dispatch_select_cpu, + .select_cpu = (void *) select_cpu_dispatch_select_cpu, .name = "select_cpu_dispatch", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 7b42ddce0f56..3bbd5fcdfb18 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -30,8 +30,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = { - .select_cpu = select_cpu_dispatch_bad_dsq_select_cpu, - .exit = select_cpu_dispatch_bad_dsq_exit, + .select_cpu = (void *) select_cpu_dispatch_bad_dsq_select_cpu, + .exit = (void *) select_cpu_dispatch_bad_dsq_exit, .name = "select_cpu_dispatch_bad_dsq", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 653e3dc0b4dc..0fda57fe0ecf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -31,8 +31,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = { - .select_cpu = select_cpu_dispatch_dbl_dsp_select_cpu, - .exit = select_cpu_dispatch_dbl_dsp_exit, + .select_cpu = (void *) select_cpu_dispatch_dbl_dsp_select_cpu, + .exit = (void *) select_cpu_dispatch_dbl_dsp_exit, .name = "select_cpu_dispatch_dbl_dsp", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index 7f3ebf4fc2ea..e6c67bcf5e6e 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -81,12 +81,12 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_vtime_ops = { - .select_cpu = select_cpu_vtime_select_cpu, - .dispatch = select_cpu_vtime_dispatch, - .running = select_cpu_vtime_running, - .stopping = select_cpu_vtime_stopping, - .enable = select_cpu_vtime_enable, - .init = select_cpu_vtime_init, + .select_cpu = (void *) select_cpu_vtime_select_cpu, + .dispatch = (void *) select_cpu_vtime_dispatch, + .running = (void *) select_cpu_vtime_running, + .stopping = (void *) select_cpu_vtime_stopping, + .enable = (void *) select_cpu_vtime_enable, + .init = (void *) select_cpu_vtime_init, .name = "select_cpu_vtime", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/signal/.gitignore index 50a19a8888ce..3f339865a3b6 100644 --- a/tools/testing/selftests/sigaltstack/.gitignore +++ b/tools/testing/selftests/signal/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only +mangle_uc_sigmask sas diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/signal/Makefile index 3e96d5d47036..e0bf7058d19c 100644 --- a/tools/testing/selftests/sigaltstack/Makefile +++ b/tools/testing/selftests/signal/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS = -Wall -TEST_GEN_PROGS = sas +TEST_GEN_PROGS = mangle_uc_sigmask +TEST_GEN_PROGS += sas include ../lib.mk diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/signal/current_stack_pointer.h index 09da8f1011ce..09da8f1011ce 100644 --- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h +++ b/tools/testing/selftests/signal/current_stack_pointer.h diff --git a/tools/testing/selftests/signal/mangle_uc_sigmask.c b/tools/testing/selftests/signal/mangle_uc_sigmask.c new file mode 100644 index 000000000000..b79ab92178a8 --- /dev/null +++ b/tools/testing/selftests/signal/mangle_uc_sigmask.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Ltd. + * + * Author: Dev Jain <dev.jain@arm.com> + * + * Test describing a clear distinction between signal states - delivered and + * blocked, and their relation with ucontext. + * + * A process can request blocking of a signal by masking it into its set of + * blocked signals; such a signal, when sent to the process by the kernel, + * will get blocked by the process and it may later unblock it and take an + * action. At that point, the signal will be delivered. + * + * We test the following functionalities of the kernel: + * + * ucontext_t describes the interrupted context of the thread; this implies + * that, in case of registering a handler and catching the corresponding + * signal, that state is before what was jumping into the handler. + * + * The thread's mask of blocked signals can be permanently changed, i.e, not + * just during the execution of the handler, by mangling with uc_sigmask + * from inside the handler. + * + * Assume that we block the set of signals, S1, by sigaction(), and say, the + * signal for which the handler was installed, is S2. When S2 is sent to the + * program, it will be considered "delivered", since we will act on the + * signal and jump to the handler. Any instances of S1 or S2 raised, while the + * program is executing inside the handler, will be blocked; they will be + * delivered immediately upon termination of the handler. + * + * For standard signals (also see real-time signals in the man page), multiple + * blocked instances of the same signal are not queued; such a signal will + * be delivered just once. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <ucontext.h> + +#include "../kselftest.h" + +void handler_verify_ucontext(int signo, siginfo_t *info, void *uc) +{ + int ret; + + /* Kernel dumps ucontext with USR2 blocked */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGUSR2); + ksft_test_result(ret == 1, "USR2 blocked in ucontext\n"); + + /* + * USR2 is blocked; can be delivered neither here, nor after + * exit from handler + */ + if (raise(SIGUSR2)) + ksft_exit_fail_perror("raise"); +} + +void handler_segv(int signo, siginfo_t *info, void *uc) +{ + /* + * Three cases possible: + * 1. Program already terminated due to segmentation fault. + * 2. SEGV was blocked even after returning from handler_usr. + * 3. SEGV was delivered on returning from handler_usr. + * The last option must happen. + */ + ksft_test_result_pass("SEGV delivered\n"); +} + +static int cnt; + +void handler_usr(int signo, siginfo_t *info, void *uc) +{ + int ret; + + /* + * Break out of infinite recursion caused by raise(SIGUSR1) invoked + * from inside the handler + */ + ++cnt; + if (cnt > 1) + return; + + /* SEGV blocked during handler execution, delivered on return */ + if (raise(SIGSEGV)) + ksft_exit_fail_perror("raise"); + + ksft_print_msg("SEGV bypassed successfully\n"); + + /* + * Signal responsible for handler invocation is blocked by default; + * delivered on return, leading to recursion + */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + ksft_test_result(cnt == 1, + "USR1 is blocked, cannot invoke handler right now\n"); + + /* Raise USR1 again; only one instance must be delivered upon exit */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* SEGV has been blocked in sa_mask, but ucontext is empty */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGSEGV); + ksft_test_result(ret == 0, "SEGV not blocked in ucontext\n"); + + /* USR1 has been blocked, but ucontext is empty */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGUSR1); + ksft_test_result(ret == 0, "USR1 not blocked in ucontext\n"); + + /* + * Mangle ucontext; this will be copied back into ¤t->blocked + * on return from the handler. + */ + if (sigaddset(&((ucontext_t *)uc)->uc_sigmask, SIGUSR2)) + ksft_exit_fail_perror("sigaddset"); +} + +int main(int argc, char *argv[]) +{ + struct sigaction act, act2; + sigset_t set, oldset; + + ksft_print_header(); + ksft_set_plan(7); + + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &handler_usr; + + /* Add SEGV to blocked mask */ + if (sigemptyset(&act.sa_mask) || sigaddset(&act.sa_mask, SIGSEGV) + || (sigismember(&act.sa_mask, SIGSEGV) != 1)) + ksft_exit_fail_msg("Cannot add SEGV to blocked mask\n"); + + if (sigaction(SIGUSR1, &act, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + act2.sa_flags = SA_SIGINFO; + act2.sa_sigaction = &handler_segv; + + if (sigaction(SIGSEGV, &act2, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + /* Invoke handler */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* USR1 must not be queued */ + ksft_test_result(cnt == 2, "handler invoked only twice\n"); + + /* Mangled ucontext implies USR2 is blocked for current thread */ + if (raise(SIGUSR2)) + ksft_exit_fail_perror("raise"); + + ksft_print_msg("USR2 bypassed successfully\n"); + + act.sa_sigaction = &handler_verify_ucontext; + if (sigaction(SIGUSR1, &act, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* + * Raising USR2 in handler_verify_ucontext is redundant since it + * is blocked + */ + ksft_print_msg("USR2 still blocked on return from handler\n"); + + /* Confirm USR2 blockage by sigprocmask() too */ + if (sigemptyset(&set)) + ksft_exit_fail_perror("sigemptyset"); + + if (sigprocmask(SIG_BLOCK, &set, &oldset)) + ksft_exit_fail_perror("sigprocmask"); + + ksft_test_result(sigismember(&oldset, SIGUSR2) == 1, + "USR2 present in ¤t->blocked\n"); + + ksft_finished(); +} diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/signal/sas.c index 07227fab1cc9..07227fab1cc9 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/signal/sas.c diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index d1278de8ebc3..c9309a44a87e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -67,7 +67,7 @@ }, { "id": "4943", - "name": "Add basic filter with cmp ematch u32/link layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/link layer and multiple actions", "category": [ "filter", "basic" @@ -155,7 +155,7 @@ }, { "id": "32d8", - "name": "Add basic filter with cmp ematch u32/network layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/network layer and multiple actions", "category": [ "filter", "basic" @@ -243,7 +243,7 @@ }, { "id": "62d7", - "name": "Add basic filter with cmp ematch u32/transport layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/transport layer and multiple actions", "category": [ "filter", "basic" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json index 03723cf84379..35c9a7dbe1c4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json @@ -67,7 +67,7 @@ }, { "id": "0234", - "name": "Add cgroup filter with cmp ematch u32/link layer and miltiple actions", + "name": "Add cgroup filter with cmp ematch u32/link layer and multiple actions", "category": [ "filter", "cgroup" @@ -155,7 +155,7 @@ }, { "id": "2733", - "name": "Add cgroup filter with cmp ematch u32/network layer and miltiple actions", + "name": "Add cgroup filter with cmp ematch u32/network layer and multiple actions", "category": [ "filter", "cgroup" @@ -1189,7 +1189,7 @@ }, { "id": "4319", - "name": "Replace cgroup filter with diffferent match", + "name": "Replace cgroup filter with different match", "category": [ "filter", "cgroup" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 58189327f644..996448afe31b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -507,7 +507,7 @@ }, { "id": "4341", - "name": "Add flow filter with muliple ops", + "name": "Add flow filter with multiple ops", "category": [ "filter", "flow" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json index 8d8de8f65aef..05cedca67cca 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json @@ -111,7 +111,7 @@ }, { "id": "7994", - "name": "Add route filter with miltiple actions", + "name": "Add route filter with multiple actions", "category": [ "filter", "route" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index 24bd0c2a3014..b2ca9d4e991b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -329,5 +329,29 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "1234", + "name": "Exercise IDR leaks by creating/deleting a filter many (2048) times", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 match ip src 0.0.0.2/32 action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop" + ], + "cmdUnderTest": "bash -c 'for i in {1..2048} ;do echo filter delete dev $DEV1 pref 3;echo filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop;done | $TC -b -'", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 3 u32", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json new file mode 100644 index 000000000000..d3dd65b05b5f --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -0,0 +1,98 @@ +[ + { + "id": "ca5e", + "name": "Check class delete notification for ffff:", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC qdisc add dev $DUMMY parent ffff:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true", + "$TC class del dev $DUMMY classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + }, + { + "id": "e4b7", + "name": "Check class delete notification for root ffff:", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC qdisc add dev $DUMMY parent ffff:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true", + "$TC class del dev $DUMMY classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr ffff: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle ffff: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + }, + { + "id": "33a9", + "name": "Check ingress is not searchable on backlog update", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY ingress", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: drr", + "$TC filter add dev $DUMMY parent 2: basic classid 2:1", + "$TC class add dev $DUMMY parent 2: classid 2:1 drr", + "$TC qdisc add dev $DUMMY parent 2:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true" + ], + "cmdUnderTest": "$TC class del dev $DUMMY classid 2:1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$TC qdisc del dev $DUMMY ingress", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } +] diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 0e73a16874c4..32203593c62e 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 205b76a4abb4..777d9494b683 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -22,14 +22,10 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_MONOTONIC_RAW 4 - -#define NSEC_PER_SEC 1000000000LL -#define USEC_PER_SEC 1000000 - #define MILLION 1000000 long systick; diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index ad52e608b88e..a9ef76ea6051 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -28,24 +28,10 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> +#include <include/vdso/time64.h> +#include <errno.h> #include "../kselftest.h" -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 -#define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 - - -#define NSEC_PER_SEC 1000000000ULL #define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ #define SUSPEND_SECS 15 @@ -142,8 +128,8 @@ int main(void) alarmcount = 0; if (timer_create(alarm_clock_id, &se, &tm1) == -1) { - printf("timer_create failed, %s unsupported?\n", - clockstring(alarm_clock_id)); + printf("timer_create failed, %s unsupported?: %s\n", + clockstring(alarm_clock_id), strerror(errno)); break; } diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 36a49fba6c9b..9d1573769d55 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -28,24 +28,13 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CALLS_PER_LOOP 64 -#define NSEC_PER_SEC 1000000000ULL - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 + +#define CALLS_PER_LOOP 64 char *clockstring(int clockid) { @@ -152,7 +141,7 @@ int main(int argc, char *argv[]) { int clockid, opt; int userclock = CLOCK_REALTIME; - int maxclocks = NR_CLOCKIDS; + int maxclocks = CLOCK_TAI + 1; int runtime = 10; struct timespec ts; diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index 986abbdb1521..04004a7c0934 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -48,9 +48,9 @@ #include <string.h> #include <signal.h> #include <unistd.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL #define CLOCK_TAI 11 time_t next_leap; diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index f3179a605bba..63de2334a291 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -29,9 +29,9 @@ #include <signal.h> #include <errno.h> #include <mqueue.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL #define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index df1d03516e7b..252c6308c569 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -27,23 +27,11 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 #define UNSUPPORTED 0xf00f @@ -132,11 +120,12 @@ int main(int argc, char **argv) { long long length; int clockid, ret; + int max_clocks = CLOCK_TAI + 1; ksft_print_header(); - ksft_set_plan(NR_CLOCKIDS); + ksft_set_plan(max_clocks); - for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + for (clockid = CLOCK_REALTIME; clockid < max_clocks; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ if (clockid == CLOCK_PROCESS_CPUTIME_ID || diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index edb5acacf214..de23dc0c9f97 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -24,26 +24,13 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL - #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 #define UNSUPPORTED 0xf00f @@ -145,11 +132,12 @@ int main(int argc, char **argv) { long long length; int clockid, ret; + int max_clocks = CLOCK_TAI + 1; ksft_print_header(); - ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT); + ksft_set_plan(max_clocks - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT); - for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + for (clockid = CLOCK_REALTIME; clockid < max_clocks; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ if (clockid == CLOCK_PROCESS_CPUTIME_ID || diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index ddb1cebc844e..9814b3a1c77d 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -15,13 +15,12 @@ #include <string.h> #include <unistd.h> #include <time.h> +#include <include/vdso/time64.h> #include <pthread.h> #include "../kselftest.h" #define DELAY 2 -#define USECS_PER_SEC 1000000 -#define NSECS_PER_SEC 1000000000 static void __fatal_error(const char *test, const char *name, const char *what) { @@ -86,9 +85,9 @@ static int check_diff(struct timeval start, struct timeval end) long long diff; diff = end.tv_usec - start.tv_usec; - diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; + diff += (end.tv_sec - start.tv_sec) * USEC_PER_SEC; - if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USEC_PER_SEC) > USEC_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } @@ -448,7 +447,7 @@ static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) { int64_t diff; - diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff = NSEC_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); return diff; } @@ -479,7 +478,7 @@ static void check_sigev_none(int which, const char *name) do { if (clock_gettime(which, &now)) fatal_error(name, "clock_gettime()"); - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); if (timer_gettime(timerid, &its)) fatal_error(name, "timer_gettime()"); @@ -536,7 +535,7 @@ static void check_gettime(int which, const char *name) wraps++; prev = its; - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); if (timer_delete(timerid)) fatal_error(name, "timer_delete()"); @@ -587,7 +586,7 @@ static void check_overrun(int which, const char *name) do { if (clock_gettime(which, &now)) fatal_error(name, "clock_gettime()"); - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); /* Unblock it, which should deliver a signal */ if (sigprocmask(SIG_UNBLOCK, &set, NULL)) diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 030143eb09b4..957f7cd29cb1 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -25,11 +25,9 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_MONOTONIC_RAW 4 -#define NSEC_PER_SEC 1000000000LL - #define shift_right(x, s) ({ \ __typeof__(x) __x = (x); \ __typeof__(s) __s = (s); \ diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c index f7d978721b9e..ed244315e11c 100644 --- a/tools/testing/selftests/timers/set-2038.c +++ b/tools/testing/selftests/timers/set-2038.c @@ -27,10 +27,9 @@ #include <unistd.h> #include <time.h> #include <sys/time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL - #define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 7ce240c89b21..9d8437c13929 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -28,24 +28,12 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 - -#define NSEC_PER_SEC 1000000000ULL #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ #define TIMER_SECS 1 @@ -80,7 +68,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } @@ -254,6 +242,7 @@ int main(void) struct sigaction act; int signum = SIGRTMAX; int ret = 0; + int max_clocks = CLOCK_TAI + 1; /* Set up signal handler: */ sigfillset(&act.sa_mask); @@ -262,7 +251,7 @@ int main(void) sigaction(signum, &act, NULL); printf("Setting timers for every %i seconds\n", TIMER_SECS); - for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + for (clock_id = 0; clock_id < max_clocks; clock_id++) { if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || (clock_id == CLOCK_THREAD_CPUTIME_ID) || diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index d500884801d8..6b7801055ad1 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -29,11 +29,9 @@ #include <string.h> #include <signal.h> #include <unistd.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL -#define USEC_PER_SEC 1000000LL - #define ADJ_SETOFFSET 0x0100 #include <sys/syscall.h> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index af9cedbf5357..1cf14a8da438 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -9,10 +9,8 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -endif CFLAGS := -std=gnu99 -O2 @@ -37,9 +35,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(KHDR_INCLUDES) \ -isystem $(top_srcdir)/include/uapi -$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/tools/include/generated \ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ -idirafter $(top_srcdir)/include \ - -D__ASSEMBLY__ -Wa,--noexecstack + -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 7dd5668ea8a6..28f35620c499 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -222,8 +222,7 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && - ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index b1ea532c5996..8757f738b0b1 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,6 +3,7 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ +#include <linux/compiler.h> #include <tools/le_byteshift.h> #include <sys/random.h> #include <sys/auxv.h> @@ -73,10 +74,10 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u counter[1] = s[13]; } -typedef uint8_t u8; -typedef uint32_t u32; -typedef uint64_t u64; -#include <vdso/getrandom.h> +void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) +{ + ksft_exit_skip("Not implemented on architecture\n"); +} int main(int argc, char *argv[]) { @@ -90,10 +91,8 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { - if (getrandom(key, sizeof(key), 0) != sizeof(key)) { - printf("getrandom() failed!\n"); - return KSFT_SKIP; - } + if (getrandom(key, sizeof(key), 0) != sizeof(key)) + ksft_exit_skip("getrandom() failed unexpectedly\n"); memset(counter1, 0, sizeof(counter1)); reference_chacha20_blocks(output1, key, counter1, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { @@ -102,8 +101,10 @@ int main(int argc, char *argv[]) if (split) __arch_chacha20_blocks_nostack(output2, key, counter2, split); __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Main loop outputs do not match on trial %u, split %u\n", trial, split); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Main loop counters do not match on trial %u, split %u\n", trial, split); } } memset(counter1, 0, sizeof(counter1)); @@ -113,14 +114,19 @@ int main(int argc, char *argv[]) reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after first round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after first round\n"); reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after second round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after second round\n"); ksft_test_result_pass("chacha: PASS\n"); - return KSFT_PASS; + ksft_exit_pass(); + return 0; } diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index e5e83dbec589..95057f7567db 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -11,6 +11,7 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <sched.h> #include <signal.h> #include <sys/auxv.h> #include <sys/mman.h> @@ -40,6 +41,9 @@ } while (0) #endif +#define ksft_assert(condition) \ + do { if (!(condition)) ksft_exit_fail_msg("Assertion failed: %s\n", #condition); } while (0) + static struct { pthread_mutex_t lock; void **states; @@ -111,26 +115,19 @@ static void vgetrandom_init(void) const char *version = versions[VDSO_VERSION]; const char *name = names[VDSO_NAMES][6]; unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - size_t ret; + ssize_t ret; - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - exit(KSFT_SKIP); - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present\n"); vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name); - if (!vgrnd.fn) { - printf("%s is missing!\n", name); - exit(KSFT_FAIL); - } + if (!vgrnd.fn) + ksft_exit_skip("%s@%s symbol is missing from vDSO\n", name, version); ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL); - if (ret == -ENOSYS) { - printf("unsupported architecture\n"); - exit(KSFT_SKIP); - } else if (ret) { - printf("failed to fetch vgetrandom params!\n"); - exit(KSFT_FAIL); - } + if (ret == -ENOSYS) + ksft_exit_skip("CPU does not have runtime support\n"); + else if (ret) + ksft_exit_fail_msg("Failed to fetch vgetrandom params: %zd\n", ret); } static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) @@ -139,10 +136,7 @@ static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) if (!state) { state = vgetrandom_get_state(); - if (!state) { - printf("vgetrandom_get_state failed!\n"); - exit(KSFT_FAIL); - } + ksft_assert(state); } return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state); } @@ -154,7 +148,7 @@ static void *test_vdso_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = vgetrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -164,7 +158,7 @@ static void *test_libc_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = getrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -174,7 +168,7 @@ static void *test_syscall_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -209,7 +203,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -218,7 +212,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -227,7 +221,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -252,48 +246,46 @@ static void kselftest(void) for (size_t i = 0; i < 1000; ++i) { ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); - if (ret != sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_assert(ret == sizeof(weird_size)); } ksft_test_result_pass("getrandom: PASS\n"); unshare(CLONE_NEWUSER); - assert(unshare(CLONE_NEWTIME) == 0); + ksft_assert(unshare(CLONE_NEWTIME) == 0); child = fork(); - assert(child >= 0); + ksft_assert(child >= 0); if (!child) { vgetrandom_init(); child = getpid(); - assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); - assert(kill(child, SIGSTOP) == 0); - assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); + ksft_assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); + ksft_assert(kill(child, SIGSTOP) == 0); + ksft_assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); _exit(0); } for (;;) { struct ptrace_syscall_info info = { 0 }; int status, ret; - assert(waitpid(child, &status, 0) >= 0); + ksft_assert(waitpid(child, &status, 0) >= 0); if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) - exit(KSFT_FAIL); + ksft_assert(WEXITSTATUS(status) == 0); break; } - assert(WIFSTOPPED(status)); + ksft_assert(WIFSTOPPED(status)); if (WSTOPSIG(status) == SIGSTOP) - assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); + ksft_assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) { - assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); + ksft_assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom && info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_exit_fail_msg("vgetrandom passed buffer to syscall getrandom unexpectedly\n"); } - assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); + ksft_assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); } ksft_test_result_pass("getrandom timens: PASS\n"); - exit(KSFT_PASS); + ksft_exit_pass(); } static void usage(const char *argv0) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S new file mode 100644 index 000000000000..d6e09af7c0a9 --- /dev/null +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define __ASSEMBLY__ + +#if defined(__aarch64__) +#include "../../../../arch/arm64/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__loongarch__) +#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" +#elif defined(__powerpc__) || defined(__powerpc64__) +#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__s390x__) +#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" +#elif defined(__x86_64__) +#include "../../../../arch/x86/entry/vdso/vgetrandom-chacha.S" +#endif diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index bc71cbca0dde..a1f506ba5578 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -334,7 +334,13 @@ int main(int argc, char *argv[]) printf("Watchdog Ticking Away!\n"); + /* + * Register the signals + */ signal(SIGINT, term); + signal(SIGTERM, term); + signal(SIGKILL, term); + signal(SIGQUIT, term); while (1) { keep_alive(); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 405ff262ca93..55500f901fbc 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -332,6 +332,7 @@ waitiface $netns1 vethc waitiface $netns2 veths n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +[[ -e /proc/sys/net/netfilter/nf_conntrack_udp_timeout ]] || modprobe nf_conntrack n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 9d172210e2c6..139fd9aa8b12 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 17263696b5d8..66dbb362385f 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -96,10 +96,13 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, p = node; } else { pthread_mutex_unlock(&cachep->lock); - if (cachep->align) - posix_memalign(&p, cachep->align, cachep->size); - else + if (cachep->align) { + if (posix_memalign(&p, cachep->align, cachep->size) < 0) + return NULL; + } else { p = malloc(cachep->size); + } + if (cachep->ctor) cachep->ctor(p); else if (gfp & __GFP_ZERO) @@ -195,8 +198,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } if (cachep->align) { - posix_memalign(&p[i], cachep->align, - cachep->size); + if (posix_memalign(&p[i], cachep->align, + cachep->size) < 0) + break; } else { p[i] = malloc(cachep->size); if (!p[i]) diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index a6bc51d0b0bf..923ee2492256 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -69,6 +69,7 @@ generated/bit-length.h: FORCE @if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \ echo "Generating $@"; \ echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \ + echo "#define CONFIG_PHYS_ADDR_T_$(LONG_BIT)BIT 1" >> $@; \ fi FORCE: ; diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index c53f220eb6cc..8fab5e13c7c3 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -4,6 +4,8 @@ #include <stdio.h> #include <stdlib.h> +#include "generated/bit-length.h" + #include "maple-shared.h" #include "vma_internal.h" @@ -1522,6 +1524,45 @@ static bool test_copy_vma(void) return true; } +static bool test_expand_only_mode(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma_prev, *vma; + VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, flags, 5); + + /* + * Place a VMA prior to the one we're expanding so we assert that we do + * not erroneously try to traverse to the previous VMA even though we + * have, through the use of VMG_FLAG_JUST_EXPAND, indicated we do not + * need to do so. + */ + alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); + + /* + * We will be positioned at the prev VMA, but looking to expand to + * 0x9000. + */ + vma_iter_set(&vmi, 0x3000); + vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.merge_flags = VMG_FLAG_JUST_EXPAND; + + vma = vma_merge_new_range(&vmg); + ASSERT_NE(vma, NULL); + ASSERT_EQ(vma, vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma->vm_start, 0x3000); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 3); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); + + cleanup_mm(&mm, &vmi); + return true; +} + int main(void) { int num_tests = 0, num_fail = 0; @@ -1553,6 +1594,7 @@ int main(void) TEST(vmi_prealloc_fail); TEST(merge_extend); TEST(copy_vma); + TEST(expand_only_mode); #undef TEST diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index c5b9da034511..e76ff579e1fd 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -44,7 +44,9 @@ #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 #define VM_DONTEXPAND 0x00040000 +#define VM_LOCKONFAULT 0x00080000 #define VM_ACCOUNT 0x00100000 +#define VM_NORESERVE 0x00200000 #define VM_MIXEDMAP 0x10000000 #define VM_STACK VM_GROWSDOWN #define VM_SHADOW_STACK VM_NONE @@ -53,6 +55,14 @@ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) + +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + #define FIRST_USER_ADDRESS 0UL #define USER_PGTABLES_CEILING 0UL @@ -698,8 +708,9 @@ static inline void tlb_finish_mmu(struct mmu_gather *) { } -static inline void get_file(struct file *) +static inline struct file *get_file(struct file *f) { + return f; } static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) @@ -920,4 +931,106 @@ static inline bool signal_pending(void *) return false; } +static inline bool is_file_hugepages(struct file *) +{ + return false; +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *, long) +{ + return true; +} + +static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) +{ + return true; +} + +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma->__vm_flags = flags; +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags &= ~flags; +} + +static inline int call_mmap(struct file *, struct vm_area_struct *) +{ + return 0; +} + +static inline int shmem_zero_setup(struct vm_area_struct *) +{ + return 0; +} + +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + +static inline void ksm_add_vma(struct vm_area_struct *) +{ +} + +static inline void perf_event_mmap(struct vm_area_struct *) +{ +} + +static inline bool vma_is_dax(struct vm_area_struct *) +{ + return false; +} + +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *) +{ + return NULL; +} + +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +static inline void vma_set_page_prot(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + pgprot_t vm_page_prot; + + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); + + if (vma_wants_writenotify(vma, vm_page_prot)) { + vm_flags &= ~VM_SHARED; + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); + } + /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ + WRITE_ONCE(vma->vm_page_prot, vm_page_prot); +} + +static inline bool arch_validate_flags(unsigned long) +{ + return true; +} + +static inline void vma_close(struct vm_area_struct *) +{ +} + +static inline int mmap_file(struct file *, struct vm_area_struct *) +{ + return 0; +} + #endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index d2deb4b15b94..0066e0324d35 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -27,6 +27,7 @@ #include "timeout.h" #include "control.h" +#include "util.h" static int control_fd = -1; @@ -50,7 +51,6 @@ void control_init(const char *control_host, for (ai = result; ai; ai = ai->ai_next) { int fd; - int val = 1; fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (fd < 0) @@ -65,11 +65,8 @@ void control_init(const char *control_host, break; } - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &val, sizeof(val)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_REUSEADDR, 1, + "setsockopt SO_REUSEADDR"); if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) goto next; diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c index 5a4bdf7b5132..8622e5a0f8b7 100644 --- a/tools/testing/vsock/msg_zerocopy_common.c +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -14,16 +14,6 @@ #include "msg_zerocopy_common.h" -void enable_so_zerocopy(int fd) -{ - int val = 1; - - if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } -} - void vsock_recv_completion(int fd, const bool *zerocopied) { struct sock_extended_err *serr; diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h index 3763c5ccedb9..ad14139e93ca 100644 --- a/tools/testing/vsock/msg_zerocopy_common.h +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -12,7 +12,6 @@ #define VSOCK_RECVERR 1 #endif -void enable_so_zerocopy(int fd); void vsock_recv_completion(int fd, const bool *zerocopied); #endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index a3d448a075e3..34e9dac0a105 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -651,3 +651,145 @@ void free_test_iovec(const struct iovec *test_iovec, free(iovec); } + +/* Set "unsigned long long" socket option and check that it's indeed set */ +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg) +{ + unsigned long long chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %llu got %llu\n", val, + chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %llu\n", errmsg, val); + exit(EXIT_FAILURE); +; +} + +/* Set "int" socket option and check that it's indeed set */ +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg) +{ + int chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %d got %d\n", val, chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %d\n", errmsg, val); + exit(EXIT_FAILURE); +} + +static void mem_invert(unsigned char *mem, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + mem[i] = ~mem[i]; +} + +/* Set "timeval" socket option and check that it's indeed set */ +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg) +{ + struct timeval chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + /* just make storage != val */ + chkval = val; + mem_invert((unsigned char *)&chkval, sizeof(chkval)); + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (memcmp(&chkval, &val, sizeof(val)) != 0) { + fprintf(stderr, "value mismatch: set %ld:%ld got %ld:%ld\n", + val.tv_sec, val.tv_usec, chkval.tv_sec, chkval.tv_usec); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %ld:%ld\n", errmsg, val.tv_sec, val.tv_usec); + exit(EXIT_FAILURE); +} + +void enable_so_zerocopy_check(int fd) +{ + setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, + "setsockopt SO_ZEROCOPY"); +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index fff22d4a14c0..ba84d296d8b7 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -68,4 +68,11 @@ unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); void free_test_iovec(const struct iovec *test_iovec, struct iovec *iovec, int iovnum); +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg); +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg); +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg); +void enable_so_zerocopy_check(int fd); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index 4e8578f815e0..75971ac708c9 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -33,7 +33,7 @@ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; -static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static unsigned long long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; static bool zerocopy; static void error(const char *s) @@ -133,7 +133,7 @@ static float get_gbps(unsigned long bits, time_t ns_delta) ((float)ns_delta / NSEC_PER_SEC); } -static void run_receiver(unsigned long rcvlowat_bytes) +static void run_receiver(int rcvlowat_bytes) { unsigned int read_cnt; time_t rx_begin_ns; @@ -162,8 +162,8 @@ static void run_receiver(unsigned long rcvlowat_bytes) printf("Run as receiver\n"); printf("Listen port %u\n", port); printf("RX buffer %lu bytes\n", buf_size_bytes); - printf("vsock buffer %lu bytes\n", vsock_buf_bytes); - printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + printf("vsock buffer %llu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %d bytes\n", rcvlowat_bytes); fd = socket(AF_VSOCK, SOCK_STREAM, 0); @@ -251,6 +251,16 @@ static void run_receiver(unsigned long rcvlowat_bytes) close(fd); } +static void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + static void run_sender(int peer_cid, unsigned long to_send_bytes) { time_t tx_begin_ns; @@ -439,7 +449,7 @@ static long strtolx(const char *arg) int main(int argc, char **argv) { unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; - unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; int peer_cid = -1; bool sender = false; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 8d38dbf8f41f..48f17641ca50 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -429,7 +429,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; unsigned long remote_hash; unsigned long curr_hash; int fd; @@ -444,17 +444,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) sock_buf_size = SOCK_BUF_SIZE; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); /* Ready to receive data. */ control_writeln("SRVREADY"); @@ -586,10 +582,8 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_sec = RCVTIMEO_TIMEOUT_SEC; tv.tv_usec = 0; - if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt(SO_RCVTIMEO)"); - exit(EXIT_FAILURE); - } + setsockopt_timeval_check(fd, SOL_SOCKET, SO_RCVTIMEO, tv, + "setsockopt(SO_RCVTIMEO)"); read_enter_ns = current_nsec(); @@ -634,7 +628,8 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) static void test_seqpacket_bigmsg_client(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; + size_t buf_size; socklen_t len; void *data; int fd; @@ -655,13 +650,20 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) sock_buf_size++; - data = malloc(sock_buf_size); + /* size_t can be < unsigned long long */ + buf_size = (size_t)sock_buf_size; + if (buf_size != sock_buf_size) { + fprintf(stderr, "Returned BUFFER_SIZE too large\n"); + exit(EXIT_FAILURE); + } + + data = malloc(buf_size); if (!data) { perror("malloc"); exit(EXIT_FAILURE); } - send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE); + send_buf(fd, data, buf_size, 0, -EMSGSIZE); control_writeln("CLISENT"); @@ -835,7 +837,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) { - unsigned long lowat_val = RCVLOWAT_BUF_SIZE; + int lowat_val = RCVLOWAT_BUF_SIZE; char buf[RCVLOWAT_BUF_SIZE]; struct pollfd fds; short poll_flags; @@ -847,11 +849,8 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &lowat_val, sizeof(lowat_val))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + lowat_val, "setsockopt(SO_RCVLOWAT)"); control_expectln("SRVSENT"); @@ -1357,9 +1356,10 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt static void test_stream_credit_update_test(const struct test_opts *opts, bool low_rx_bytes_test) { - size_t recv_buf_size; + int recv_buf_size; struct pollfd fds; size_t buf_size; + unsigned long long sock_buf_size; void *buf; int fd; @@ -1371,11 +1371,12 @@ static void test_stream_credit_update_test(const struct test_opts *opts, buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &buf_size, sizeof(buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + /* size_t can be < unsigned long long */ + sock_buf_size = buf_size; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); if (low_rx_bytes_test) { /* Set new SO_RCVLOWAT here. This enables sending credit @@ -1384,11 +1385,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, */ recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } /* Send one dummy byte here, because 'setsockopt()' above also @@ -1430,11 +1428,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, recv_buf_size++; /* Updating SO_RCVLOWAT will send credit update. */ - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } fds.fd = fd; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index 04c376b6937f..9d9a6cb9614a 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -162,7 +162,7 @@ static void test_client(const struct test_opts *opts, } if (test_data->so_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c index 6c3e6f70c457..5c3078969659 100644 --- a/tools/testing/vsock/vsock_uring_test.c +++ b/tools/testing/vsock/vsock_uring_test.c @@ -73,7 +73,7 @@ static void vsock_io_uring_client(const struct test_opts *opts, } if (msg_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile index 82db451935c5..f2552f73a64c 100644 --- a/tools/thermal/lib/Makefile +++ b/tools/thermal/lib/Makefile @@ -3,7 +3,7 @@ LIBTHERMAL_TOOLS_VERSION = 0 LIBTHERMAL_TOOLS_PATCHLEVEL = 0 -LIBTHERMAL_TOOLS_EXTRAVERSION = 1 +LIBTHERMAL_TOOLS_EXTRAVERSION = 2 MAKEFLAGS += --no-print-directory diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c index 9b1476a2680f..0764dc754771 100644 --- a/tools/thermal/thermal-engine/thermal-engine.c +++ b/tools/thermal/thermal-engine/thermal-engine.c @@ -38,6 +38,14 @@ struct thermal_data { struct thermal_handler *th; }; +static int show_threshold(struct thermal_threshold *th, __maybe_unused void *arg) +{ + INFO("threshold temp=%d, direction=%d\n", + th->temperature, th->direction); + + return 0; +} + static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg) { INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n", @@ -70,6 +78,8 @@ static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) for_each_thermal_trip(tz->trip, show_trip, NULL); + for_each_thermal_threshold(tz->thresholds, show_threshold, NULL); + show_temp(tz, arg); show_governor(tz, arg); @@ -77,6 +87,30 @@ static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) return 0; } +static int set_threshold(struct thermal_zone *tz, __maybe_unused void *arg) +{ + struct thermal_handler *th = arg; + int thresholds[] = { 43000, 65000, 49000, 55000, 57000 }; + size_t i; + + INFO("Setting threshold for thermal zone '%s', id=%d\n", tz->name, tz->id); + + if (thermal_cmd_threshold_flush(th, tz)) { + ERROR("Failed to flush all previous thresholds\n"); + return -1; + } + + for (i = 0; i < sizeof(thresholds) / sizeof(thresholds[0]); i++) + if (thermal_cmd_threshold_add(th, tz, thresholds[i], + THERMAL_THRESHOLD_WAY_UP | + THERMAL_THRESHOLD_WAY_DOWN)) { + ERROR("Failed to set threshold\n"); + return -1; + } + + return 0; +} + static int tz_create(const char *name, int tz_id, __maybe_unused void *arg) { INFO("Thermal zone '%s'/%d created\n", name, tz_id); @@ -197,20 +231,62 @@ static int gov_change(int tz_id, const char *name, __maybe_unused void *arg) return 0; } +static int threshold_add(int tz_id, int temp, int direction, __maybe_unused void *arg) +{ + INFO("Threshold added tz_id=%d: temp=%d, direction=%d\n", tz_id, temp, direction); + + return 0; +} + +static int threshold_delete(int tz_id, int temp, int direction, __maybe_unused void *arg) +{ + INFO("Threshold deleted tz_id=%d: temp=%d, direction=%d\n", tz_id, temp, direction); + + return 0; +} + +static int threshold_flush(int tz_id, __maybe_unused void *arg) +{ + INFO("Thresholds flushed tz_id=%d\n", tz_id); + + return 0; +} + +static int threshold_up(int tz_id, int temp, int prev_temp, __maybe_unused void *arg) +{ + INFO("Threshold crossed way up tz_id=%d: temp=%d, prev_temp=%d\n", + tz_id, temp, prev_temp); + + return 0; +} + +static int threshold_down(int tz_id, int temp, int prev_temp, __maybe_unused void *arg) +{ + INFO("Threshold crossed way down tz_id=%d: temp=%d, prev_temp=%d\n", + tz_id, temp, prev_temp); + + return 0; +} + static struct thermal_ops ops = { - .events.tz_create = tz_create, - .events.tz_delete = tz_delete, - .events.tz_disable = tz_disable, - .events.tz_enable = tz_enable, - .events.trip_high = trip_high, - .events.trip_low = trip_low, - .events.trip_add = trip_add, - .events.trip_delete = trip_delete, - .events.trip_change = trip_change, - .events.cdev_add = cdev_add, - .events.cdev_delete = cdev_delete, - .events.cdev_update = cdev_update, - .events.gov_change = gov_change + .events.tz_create = tz_create, + .events.tz_delete = tz_delete, + .events.tz_disable = tz_disable, + .events.tz_enable = tz_enable, + .events.trip_high = trip_high, + .events.trip_low = trip_low, + .events.trip_add = trip_add, + .events.trip_delete = trip_delete, + .events.trip_change = trip_change, + .events.cdev_add = cdev_add, + .events.cdev_delete = cdev_delete, + .events.cdev_update = cdev_update, + .events.gov_change = gov_change, + .events.threshold_add = threshold_add, + .events.threshold_delete = threshold_delete, + .events.threshold_flush = threshold_flush, + .events.threshold_up = threshold_up, + .events.threshold_down = threshold_down, }; static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg) @@ -280,6 +356,7 @@ enum { THERMAL_ENGINE_DAEMON_ERROR, THERMAL_ENGINE_LOG_ERROR, THERMAL_ENGINE_THERMAL_ERROR, + THERMAL_ENGINE_THRESHOLD_ERROR, THERMAL_ENGINE_MAINLOOP_ERROR, }; @@ -318,6 +395,8 @@ int main(int argc, char *argv[]) return THERMAL_ENGINE_THERMAL_ERROR; } + for_each_thermal_zone(td.tz, set_threshold, td.th); + for_each_thermal_zone(td.tz, show_tz, td.th); if (mainloop_init()) { diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c index 1a87a0a77f9f..022865da8e3c 100644 --- a/tools/thermal/thermometer/thermometer.c +++ b/tools/thermal/thermometer/thermometer.c @@ -259,6 +259,7 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, { int fd; char tz_path[PATH_MAX]; + struct tz *tz; sprintf(tz_path, CLASS_THERMAL"/%s/temp", path); @@ -268,13 +269,13 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, return -1; } - thermometer->tz = realloc(thermometer->tz, - sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); - if (!thermometer->tz) { + tz = realloc(thermometer->tz, sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); + if (!tz) { ERROR("Failed to allocate thermometer->tz\n"); return -1; } + thermometer->tz = tz; thermometer->tz[thermometer->nr_tz].fd_temp = fd; thermometer->tz[thermometer->nr_tz].name = strdup(name); thermometer->tz[thermometer->nr_tz].polling = polling; diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index b5878be36125..a6a7dee16622 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -32,8 +32,10 @@ DOCSRC := ../../../Documentation/tools/rtla/ FEATURE_TESTS := libtraceevent FEATURE_TESTS += libtracefs +FEATURE_TESTS += libcpupower FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs +FEATURE_DISPLAY += libcpupower ifeq ($(V),1) Q = diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f8c286712d4..92a6e12e42d3 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -43,6 +43,16 @@ else $(info libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) endif +$(call feature_check,libcpupower) +ifeq ($(feature-libcpupower), 1) + $(call detected,CONFIG_LIBCPUPOWER) + CFLAGS += -DHAVE_LIBCPUPOWER_SUPPORT + EXTLIBS += -lcpupower +else + $(info libcpupower is missing, building without --deepest-idle-state support.) + $(info Please install libcpupower-dev/kernel-tools-libs-devel) +endif + ifeq ($(STOP_ERROR),1) $(error Please, check the errors above.) endif diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt index 4af3fd40f171..dd5621038c55 100644 --- a/tools/tracing/rtla/README.txt +++ b/tools/tracing/rtla/README.txt @@ -11,6 +11,7 @@ RTLA depends on the following libraries and tools: - libtracefs - libtraceevent + - libcpupower (optional, for --deepest-idle-state) It also depends on python3-docutils to compile man pages. @@ -26,6 +27,9 @@ For development, we suggest the following steps for compiling rtla: $ make $ sudo make install $ cd .. + $ cd $libcpupower_src + $ make + $ sudo make install $ cd $rtla_src $ make $ sudo make install diff --git a/tools/tracing/rtla/sample/timerlat_load.py b/tools/tracing/rtla/sample/timerlat_load.py index 8cc5eb2d2e69..a819c3588073 100644 --- a/tools/tracing/rtla/sample/timerlat_load.py +++ b/tools/tracing/rtla/sample/timerlat_load.py @@ -25,50 +25,54 @@ import sys import os parser = argparse.ArgumentParser(description='user-space timerlat thread in Python') -parser.add_argument("cpu", help='CPU to run timerlat thread') -parser.add_argument("-p", "--prio", help='FIFO priority') - +parser.add_argument("cpu", type=int, help='CPU to run timerlat thread') +parser.add_argument("-p", "--prio", type=int, help='FIFO priority') args = parser.parse_args() try: - affinity_mask = { int(args.cpu) } -except: - print("Invalid cpu: " + args.cpu) - exit(1) - -try: - os.sched_setaffinity(0, affinity_mask); -except: - print("Error setting affinity") - exit(1) + affinity_mask = {args.cpu} + os.sched_setaffinity(0, affinity_mask) +except Exception as e: + print(f"Error setting affinity: {e}") + sys.exit(1) -if (args.prio): +if args.prio: try: - param = os.sched_param(int(args.prio)) + param = os.sched_param(args.prio) os.sched_setscheduler(0, os.SCHED_FIFO, param) - except: - print("Error setting priority") - exit(1) + except Exception as e: + print(f"Error setting priority: {e}") + sys.exit(1) try: - timerlat_path = "/sys/kernel/tracing/osnoise/per_cpu/cpu" + args.cpu + "/timerlat_fd" + timerlat_path = f"/sys/kernel/tracing/osnoise/per_cpu/cpu{args.cpu}/timerlat_fd" timerlat_fd = open(timerlat_path, 'r') -except: +except PermissionError: + print("Permission denied. Please check your access rights.") + sys.exit(1) +except OSError: print("Error opening timerlat fd, did you run timerlat -U?") - exit(1) + sys.exit(1) try: - data_fd = open("/dev/full", 'r'); -except: - print("Error opening data fd") + data_fd = open("/dev/full", 'r') +except Exception as e: + print(f"Error opening data fd: {e}") + sys.exit(1) while True: try: timerlat_fd.read(1) - data_fd.read(20*1024*1024) - except: + data_fd.read(20 * 1024 * 1024) + except KeyboardInterrupt: print("Leaving") break + except IOError as e: + print(f"I/O error occurred: {e}") + break + except Exception as e: + print(f"Unexpected error: {e}") + break timerlat_fd.close() data_fd.close() diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 30e3853076a0..45647495ce3b 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -627,7 +627,7 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p auto_house_keeping(¶ms->monitored_cpus); } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index a3907c390d67..8b66387e5f35 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -55,6 +55,7 @@ struct timerlat_hist_params { int entries; int warmup; int buffer_size; + int deepest_idle_state; }; struct timerlat_hist_cpu { @@ -62,9 +63,9 @@ struct timerlat_hist_cpu { int *thread; int *user; - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -304,15 +305,15 @@ timerlat_print_summary(struct timerlat_hist_params *params, continue; if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); @@ -488,15 +489,15 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "count:"); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.user_count); trace_seq_printf(trace->seq, "\n"); @@ -655,7 +656,7 @@ static void timerlat_hist_usage(char *usage) " [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", - " [--warm-up s]", + " [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -695,6 +696,7 @@ static void timerlat_hist_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -732,6 +734,9 @@ static struct timerlat_hist_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; params->bucket_size = 1; @@ -772,13 +777,14 @@ static struct timerlat_hist_params {"dump-task", no_argument, 0, '\1'}, {"warm-up", required_argument, 0, '\2'}, {"trace-buffer-size", required_argument, 0, '\3'}, + {"deepest-idle-state", required_argument, 0, '\4'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:", long_options, &option_index); /* detect the end of the options. */ @@ -960,6 +966,9 @@ static struct timerlat_hist_params case '\3': params->buffer_size = get_llong_from_str(optarg); break; + case '\4': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_hist_usage("Invalid option"); } @@ -1064,7 +1073,7 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_hist) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -1152,6 +1161,7 @@ int timerlat_hist_main(int argc, char *argv[]) int return_value = 1; pthread_t timerlat_u; int retval; + int nr_cpus, i; params = timerlat_hist_parse_args(argc, argv); if (!params) @@ -1201,6 +1211,28 @@ int timerlat_hist_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1332,6 +1364,13 @@ out_hist: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1340,6 +1379,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(tool); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 210b0f533534..059b468981e4 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -48,15 +48,16 @@ struct timerlat_top_params { int pretty_output; int warmup; int buffer_size; + int deepest_idle_state; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; }; struct timerlat_top_cpu { - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long cur_irq; unsigned long long min_irq; @@ -280,7 +281,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) /* * Unless trace is being lost, IRQ counter is always the max. */ - trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); @@ -447,7 +448,7 @@ static void timerlat_top_usage(char *usage) "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -481,6 +482,7 @@ static void timerlat_top_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -518,6 +520,9 @@ static struct timerlat_top_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; @@ -550,6 +555,7 @@ static struct timerlat_top_params {"aa-only", required_argument, 0, '5'}, {"warm-up", required_argument, 0, '6'}, {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, {0, 0, 0, 0} }; @@ -726,6 +732,9 @@ static struct timerlat_top_params case '7': params->buffer_size = get_llong_from_str(optarg); break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_top_usage("Invalid option"); } @@ -830,7 +839,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_top) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -850,7 +859,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; @@ -922,6 +931,7 @@ int timerlat_top_main(int argc, char *argv[]) int return_value = 1; char *max_lat; int retval; + int nr_cpus, i; params = timerlat_top_parse_args(argc, argv); if (!params) @@ -971,6 +981,28 @@ int timerlat_top_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1125,6 +1157,13 @@ out_top: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1134,6 +1173,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(top); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 9ac71a66840c..4995d35cf3ec 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -4,6 +4,9 @@ */ #define _GNU_SOURCE +#ifdef HAVE_LIBCPUPOWER_SUPPORT +#include <cpuidle.h> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ #include <dirent.h> #include <stdarg.h> #include <stdlib.h> @@ -211,29 +214,25 @@ long parse_ns_duration(char *val) /* * This is a set of helper functions to use SCHED_DEADLINE. */ -#ifdef __x86_64__ -# define __NR_sched_setattr 314 -# define __NR_sched_getattr 315 -#elif __i386__ -# define __NR_sched_setattr 351 -# define __NR_sched_getattr 352 -#elif __arm__ -# define __NR_sched_setattr 380 -# define __NR_sched_getattr 381 -#elif __aarch64__ || __riscv -# define __NR_sched_setattr 274 -# define __NR_sched_getattr 275 -#elif __powerpc__ -# define __NR_sched_setattr 355 -# define __NR_sched_getattr 356 -#elif __s390x__ -# define __NR_sched_setattr 345 -# define __NR_sched_getattr 346 +#ifndef __NR_sched_setattr +# ifdef __x86_64__ +# define __NR_sched_setattr 314 +# elif __i386__ +# define __NR_sched_setattr 351 +# elif __arm__ +# define __NR_sched_setattr 380 +# elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# elif __powerpc__ +# define __NR_sched_setattr 355 +# elif __s390x__ +# define __NR_sched_setattr 345 +# endif #endif #define SCHED_DEADLINE 6 -static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, +static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } @@ -243,7 +242,7 @@ int __set_sched_attr(int pid, struct sched_attr *attr) int flags = 0; int retval; - retval = sched_setattr(pid, attr, flags); + retval = syscall_sched_setattr(pid, attr, flags); if (retval < 0) { err_msg("Failed to set sched attributes to the pid %d: %s\n", pid, strerror(errno)); @@ -519,6 +518,153 @@ int set_cpu_dma_latency(int32_t latency) return fd; } +#ifdef HAVE_LIBCPUPOWER_SUPPORT +static unsigned int **saved_cpu_idle_disable_state; +static size_t saved_cpu_idle_disable_state_alloc_ctr; + +/* + * save_cpu_idle_state_disable - save disable for all idle states of a cpu + * + * Saves the current disable of all idle states of a cpu, to be subsequently + * restored via restore_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int save_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int nr_cpus; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (saved_cpu_idle_disable_state == NULL) { + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); + if (!saved_cpu_idle_disable_state) + return -1; + } + + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + saved_cpu_idle_disable_state_alloc_ctr++; + + for (state = 0; state < nr_states; state++) { + disabled = cpuidle_is_state_disabled(cpu, state); + if (disabled < 0) + return disabled; + saved_cpu_idle_disable_state[cpu][state] = disabled; + } + + return nr_states; +} + +/* + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu + * + * Restores the current disable state of all idle states of a cpu that was + * previously saved by save_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int restore_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int result; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (!saved_cpu_idle_disable_state) + return -1; + + for (state = 0; state < nr_states; state++) { + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + disabled = saved_cpu_idle_disable_state[cpu][state]; + result = cpuidle_state_disable(cpu, state, disabled); + if (result < 0) + return result; + } + + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + saved_cpu_idle_disable_state_alloc_ctr--; + if (saved_cpu_idle_disable_state_alloc_ctr == 0) { + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; + } + + return nr_states; +} + +/* + * free_cpu_idle_disable_states - free saved idle state disable for all cpus + * + * Frees the memory used for storing cpu idle state disable for all cpus + * and states. + * + * Normally, the memory is freed automatically in + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an + * error. + */ +void free_cpu_idle_disable_states(void) +{ + int cpu; + int nr_cpus; + + if (!saved_cpu_idle_disable_state) + return; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (cpu = 0; cpu < nr_cpus; cpu++) { + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + } + + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; +} + +/* + * set_deepest_cpu_idle_state - limit idle state of cpu + * + * Disables all idle states deeper than the one given in + * deepest_state (assuming states with higher number are deeper). + * + * This is used to reduce the exit from idle latency. Unlike + * set_cpu_dma_latency, it can disable idle states per cpu. + * + * Return: idle state count on success, negative on error + */ +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) +{ + unsigned int nr_states; + unsigned int state; + int result; + + nr_states = cpuidle_state_count(cpu); + + for (state = deepest_state + 1; state < nr_states; state++) { + result = cpuidle_state_disable(cpu, state, 1); + if (result < 0) + return result; + } + + return nr_states; +} +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + #define _STR(x) #x #define STR(x) _STR(x) diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index d44513e6c66a..101d4799a009 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -46,6 +46,7 @@ update_sum(unsigned long long *a, unsigned long long *b) *a += *b; } +#ifndef SCHED_ATTR_SIZE_VER0 struct sched_attr { uint32_t size; uint32_t sched_policy; @@ -56,6 +57,7 @@ struct sched_attr { uint64_t sched_deadline; uint64_t sched_period; }; +#endif /* SCHED_ATTR_SIZE_VER0 */ int parse_prio(char *arg, struct sched_attr *sched_param); int parse_cpu_set(char *cpu_list, cpu_set_t *set); @@ -64,6 +66,19 @@ int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); int set_comm_cgroup(const char *comm_prefix, const char *cgroup); int set_pid_cgroup(pid_t pid, const char *cgroup); int set_cpu_dma_latency(int32_t latency); +#ifdef HAVE_LIBCPUPOWER_SUPPORT +int save_cpu_idle_disable_state(unsigned int cpu); +int restore_cpu_idle_disable_state(unsigned int cpu); +void free_cpu_idle_disable_states(void); +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); +static inline int have_libcpupower_support(void) { return 1; } +#else +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline void free_cpu_idle_disable_states(void) { } +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; } +static inline int have_libcpupower_support(void) { return 0; } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ int auto_house_keeping(cpu_set_t *monitored_cpus); #define ns_to_usf(x) (((double)x/1000)) diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c index b29101986b5a..6b78d4a81e95 100644 --- a/tools/usb/usbip/src/usbip_detach.c +++ b/tools/usb/usbip/src/usbip_detach.c @@ -68,6 +68,7 @@ static int detach_port(char *port) } if (!found) { + ret = -1; err("Invalid port %s > maxports %d", port, vhci_driver->nports); goto call_driver_close; diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index baffeb960ff0..bdeb98baa8b0 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -29,11 +29,11 @@ class Automata: def __get_model_name(self): basename = ntpath.basename(self.__dot_path) - if basename.endswith(".dot") == False: + if not basename.endswith(".dot") and not basename.endswith(".gv"): print("not a dot file") raise Exception("not a dot file: %s" % self.__dot_path) - model_name = basename[0:-4] + model_name = ntpath.splitext(basename)[0] if model_name.__len__() == 0: raise Exception("not a dot file: %s" % self.__dot_path) @@ -68,9 +68,9 @@ class Automata: def __get_cursor_begin_events(self): cursor = 0 while self.__dot_lines[cursor].split()[0] != "{node": - cursor += 1 + cursor += 1 while self.__dot_lines[cursor].split()[0] == "{node": - cursor += 1 + cursor += 1 # skip initial state transition cursor += 1 return cursor @@ -94,11 +94,11 @@ class Automata: initial_state = state[7:] else: states.append(state) - if self.__dot_lines[cursor].__contains__("doublecircle") == True: + if "doublecircle" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True - if self.__dot_lines[cursor].__contains__("ellipse") == True: + if "ellipse" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True @@ -110,7 +110,7 @@ class Automata: # Insert the initial state at the bein og the states states.insert(0, initial_state) - if has_final_states == False: + if not has_final_states: final_states.append(initial_state) return states, initial_state, final_states @@ -120,7 +120,7 @@ class Automata: cursor = self.__get_cursor_begin_events() events = [] - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': # transitions have the format: # "all_fired" -> "both_fired" [ label = "disable_irq" ]; # ------------ event is here ------------^^^^^ @@ -161,7 +161,7 @@ class Automata: # and we are back! Let's fill the matrix cursor = self.__get_cursor_begin_events() - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': if self.__dot_lines[cursor].split()[1] == "->": line = self.__dot_lines[cursor].split() origin_state = line[0].replace('"','').replace(',','_') diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index f04479ecc96c..f2bbc75a76f4 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -332,7 +332,7 @@ static void ikm_print_header(struct trace_seq *s) * ikm_event_handler - callback to handle event events * * Called any time a rv:"monitor"_event events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_event_handler(struct trace_seq *s, struct tep_record *record, @@ -384,7 +384,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record, * ikm_error_handler - callback to handle error events * * Called any time a rv:"monitor"_errors events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_error_handler(struct trace_seq *s, struct tep_record *record, diff --git a/tools/verification/rv/src/trace.c b/tools/verification/rv/src/trace.c index 2c7deed47f8d..1b9f9bfa1893 100644 --- a/tools/verification/rv/src/trace.c +++ b/tools/verification/rv/src/trace.c @@ -81,7 +81,7 @@ void trace_instance_destroy(struct trace_instance *trace) } /** - * trace_instance_init - create an trace instance + * trace_instance_init - create a trace instance * * It is more than the tracefs instance, as it contains other * things required for the tracing, such as the local events and diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 43d3a6aa1dcf..b9591223437a 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -519,7 +519,7 @@ int main(int argc, char *argv[]) errx(1, "virtqueue_add_sgs: %i", err); __kmalloc_fake = NULL; - /* Host retreives it. */ + /* Host retrieves it. */ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); |