diff options
Diffstat (limited to 'tools')
136 files changed, 5456 insertions, 1159 deletions
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h index 877827b7c2c3..a61051400311 100644 --- a/tools/arch/x86/include/asm/inat.h +++ b/tools/arch/x86/include/asm/inat.h @@ -6,7 +6,7 @@ * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ -#include "inat_types.h" +#include "inat_types.h" /* __ignore_sync_check__ */ /* * Internal bits. Don't use bitmasks directly, because these bits are diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index cc777c185212..dc632b41f135 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -9,7 +9,7 @@ #include <asm/byteorder.h> /* insn_attr_t is defined in inat.h */ -#include "inat.h" +#include "inat.h" /* __ignore_sync_check__ */ #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) @@ -132,13 +132,25 @@ struct insn { #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -extern void insn_get_prefixes(struct insn *insn); -extern void insn_get_opcode(struct insn *insn); -extern void insn_get_modrm(struct insn *insn); -extern void insn_get_sib(struct insn *insn); -extern void insn_get_displacement(struct insn *insn); -extern void insn_get_immediate(struct insn *insn); -extern void insn_get_length(struct insn *insn); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) @@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn) /* Instruction uses RIP-relative addressing */ extern int insn_rip_relative(struct insn *insn); -/* Init insn for kernel text */ -static inline void kernel_insn_init(struct insn *insn, - const void *kaddr, int buf_len) -{ -#ifdef CONFIG_X86_64 - insn_init(insn, kaddr, buf_len, 1); -#else /* CONFIG_X86_32 */ - insn_init(insn, kaddr, buf_len, 0); -#endif -} - static inline int insn_is_avx(struct insn *insn) { if (!insn->prefixes.got) @@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn) return !!insn->emulate_prefix_size; } -/* Ensure this instruction is decoded completely */ -static inline int insn_complete(struct insn *insn) -{ - return insn->opcode.got && insn->modrm.got && insn->sib.got && - insn->displacement.got && insn->immediate.got; -} - static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 546d6ecf0a35..45029354e0a8 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -628,8 +628,6 @@ #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) -#define MSR_IA32_TSCDEADLINE 0x000006e0 - #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h new file mode 100644 index 000000000000..c1e5e818ba16 --- /dev/null +++ b/tools/arch/x86/include/asm/nops.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NOPS_H +#define _ASM_X86_NOPS_H + +/* + * Define nops for use with alternative() and for tracing. + */ + +#ifndef CONFIG_64BIT + +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 + +#else + +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x00000000(%eax) + * 8: nopl 0x00000000(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 + +#endif /* CONFIG_64BIT */ + +#ifdef __ASSEMBLY__ +#define _ASM_MK_NOP(x) .byte x +#else +#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" +#endif + +#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) + +#define ASM_NOP_MAX 8 + +#ifndef __ASSEMBLY__ +extern const unsigned char * const x86_nops[]; +#endif + +#endif /* _ASM_X86_NOPS_H */ diff --git a/tools/arch/x86/kcpuid/Makefile b/tools/arch/x86/kcpuid/Makefile new file mode 100644 index 000000000000..87b554fab14b --- /dev/null +++ b/tools/arch/x86/kcpuid/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for x86/kcpuid tool + +kcpuid : kcpuid.c + +CFLAGS = -Wextra + +BINDIR ?= /usr/sbin + +HWDATADIR ?= /usr/share/misc/ + +override CFLAGS += -O2 -Wall -I../../../include + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +.PHONY : clean +clean : + @rm -f kcpuid + +install : kcpuid + install -d $(DESTDIR)$(BINDIR) + install -m 755 -p kcpuid $(DESTDIR)$(BINDIR)/kcpuid + install -m 444 -p cpuid.csv $(HWDATADIR)/cpuid.csv diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv new file mode 100644 index 000000000000..4f1c4b0c29e9 --- /dev/null +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -0,0 +1,400 @@ +# The basic row format is: +# LEAF, SUBLEAF, register_name, bits, short_name, long_description + +# Leaf 00H + 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs + +# Leaf 01H + 1, 0, EAX, 3:0, stepping, Stepping ID + 1, 0, EAX, 7:4, model, Model + 1, 0, EAX, 11:8, family, Family ID + 1, 0, EAX, 13:12, processor, Processor Type + 1, 0, EAX, 19:16, model_ext, Extended Model ID + 1, 0, EAX, 27:20, family_ext, Extended Family ID + + 1, 0, EBX, 7:0, brand, Brand Index + 1, 0, EBX, 15:8, clflush_size, CLFLUSH line size (value * 8) in bytes + 1, 0, EBX, 23:16, max_cpu_id, Maxim number of addressable logic cpu in this package + 1, 0, EBX, 31:24, apic_id, Initial APIC ID + + 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) + 1, 0, ECX, 1, pclmulqdq, PCLMULQDQ instruction supported + 1, 0, ECX, 2, dtes64, DS area uses 64-bit layout + 1, 0, ECX, 3, mwait, MONITOR/MWAIT supported + 1, 0, ECX, 4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL + 1, 0, ECX, 5, vmx, Virtual Machine Extensions supported + 1, 0, ECX, 6, smx, Safer Mode Extension supported + 1, 0, ECX, 7, eist, Enhanced Intel SpeedStep Technology + 1, 0, ECX, 8, tm2, Thermal Monitor 2 + 1, 0, ECX, 9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3) + 1, 0, ECX, 10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition) + 1, 0, ECX, 11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported + 1, 0, ECX, 12, fma, FMA extensions using YMM state supported + 1, 0, ECX, 13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported + 1, 0, ECX, 14, xtpr_update, xTPR Update Control supported + 1, 0, ECX, 15, pdcm, Perfmon and Debug Capability present + 1, 0, ECX, 17, pcid, Process-Context Identifiers feature present + 1, 0, ECX, 18, dca, Prefetching data from a memory mapped device supported + 1, 0, ECX, 19, sse4_1, SSE4.1 feature present + 1, 0, ECX, 20, sse4_2, SSE4.2 feature present + 1, 0, ECX, 21, x2apic, x2APIC supported + 1, 0, ECX, 22, movbe, MOVBE instruction supported + 1, 0, ECX, 23, popcnt, POPCNT instruction supported + 1, 0, ECX, 24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value + 1, 0, ECX, 25, aesni, AESNI instruction supported + 1, 0, ECX, 26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0) + 1, 0, ECX, 27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0 + 1, 0, ECX, 28, avx, AVX instruction supported + 1, 0, ECX, 29, f16c, 16-bit floating-point conversion instruction supported + 1, 0, ECX, 30, rdrand, RDRAND instruction supported + + 1, 0, EDX, 0, fpu, x87 FPU on chip + 1, 0, EDX, 1, vme, Virtual-8086 Mode Enhancement + 1, 0, EDX, 2, de, Debugging Extensions + 1, 0, EDX, 3, pse, Page Size Extensions + 1, 0, EDX, 4, tsc, Time Stamp Counter + 1, 0, EDX, 5, msr, RDMSR and WRMSR Support + 1, 0, EDX, 6, pae, Physical Address Extensions + 1, 0, EDX, 7, mce, Machine Check Exception + 1, 0, EDX, 8, cx8, CMPXCHG8B instr + 1, 0, EDX, 9, apic, APIC on Chip + 1, 0, EDX, 11, sep, SYSENTER and SYSEXIT instrs + 1, 0, EDX, 12, mtrr, Memory Type Range Registers + 1, 0, EDX, 13, pge, Page Global Bit + 1, 0, EDX, 14, mca, Machine Check Architecture + 1, 0, EDX, 15, cmov, Conditional Move Instrs + 1, 0, EDX, 16, pat, Page Attribute Table + 1, 0, EDX, 17, pse36, 36-Bit Page Size Extension + 1, 0, EDX, 18, psn, Processor Serial Number + 1, 0, EDX, 19, clflush, CLFLUSH instr +# 1, 0, EDX, 20, + 1, 0, EDX, 21, ds, Debug Store + 1, 0, EDX, 22, acpi, Thermal Monitor and Software Controlled Clock Facilities + 1, 0, EDX, 23, mmx, Intel MMX Technology + 1, 0, EDX, 24, fxsr, XSAVE and FXRSTOR Instrs + 1, 0, EDX, 25, sse, SSE + 1, 0, EDX, 26, sse2, SSE2 + 1, 0, EDX, 27, ss, Self Snoop + 1, 0, EDX, 28, hit, Max APIC IDs + 1, 0, EDX, 29, tm, Thermal Monitor +# 1, 0, EDX, 30, + 1, 0, EDX, 31, pbe, Pending Break Enable + +# Leaf 02H +# cache and TLB descriptor info + +# Leaf 03H +# Precessor Serial Number, introduced on Pentium III, not valid for +# latest models + +# Leaf 04H +# thread/core and cache topology + 4, 0, EAX, 4:0, cache_type, Cache type like instr/data or unified + 4, 0, EAX, 7:5, cache_level, Cache Level (starts at 1) + 4, 0, EAX, 8, cache_self_init, Cache Self Initialization + 4, 0, EAX, 9, fully_associate, Fully Associative cache +# 4, 0, EAX, 13:10, resvd, resvd + 4, 0, EAX, 25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache + 4, 0, EAX, 31:26, max_phy_id, Max number of addressable IDs for processors in phy package + + 4, 0, EBX, 11:0, cache_linesize, Size of a cache line in bytes + 4, 0, EBX, 21:12, cache_partition, Physical Line partitions + 4, 0, EBX, 31:22, cache_ways, Ways of associativity + 4, 0, ECX, 31:0, cache_sets, Number of Sets - 1 + 4, 0, EDX, 0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache + 4, 0, EDX, 1, c_incl, Whether cache is inclusive of lower cache level + 4, 0, EDX, 2, c_comp_index, Complex Cache Indexing + +# Leaf 05H +# MONITOR/MWAIT + 5, 0, EAX, 15:0, min_mon_size, Smallest monitor line size in bytes + 5, 0, EBX, 15:0, max_mon_size, Largest monitor line size in bytes + 5, 0, ECX, 0, mwait_ext, Enum of Monitor-Mwait extensions supported + 5, 0, ECX, 1, mwait_irq_break, Largest monitor line size in bytes + 5, 0, EDX, 3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT + 5, 0, EDX, 7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT + 5, 0, EDX, 11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT + 5, 0, EDX, 15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT + 5, 0, EDX, 19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT + 5, 0, EDX, 23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT + 5, 0, EDX, 27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT + 5, 0, EDX, 31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT + +# Leaf 06H +# Thermal & Power Management + + 6, 0, EAX, 0, dig_temp, Digital temperature sensor supported + 6, 0, EAX, 1, turbo, Intel Turbo Boost + 6, 0, EAX, 2, arat, Always running APIC timer +# 6, 0, EAX, 3, resv, Reserved + 6, 0, EAX, 4, pln, Power limit notifications supported + 6, 0, EAX, 5, ecmd, Clock modulation duty cycle extension supported + 6, 0, EAX, 6, ptm, Package thermal management supported + 6, 0, EAX, 7, hwp, HWP base register + 6, 0, EAX, 8, hwp_notify, HWP notification + 6, 0, EAX, 9, hwp_act_window, HWP activity window + 6, 0, EAX, 10, hwp_energy, HWP energy performance preference + 6, 0, EAX, 11, hwp_pkg_req, HWP package level request +# 6, 0, EAX, 12, resv, Reserved + 6, 0, EAX, 13, hdc, HDC base registers supported + 6, 0, EAX, 14, turbo3, Turbo Boost Max 3.0 + 6, 0, EAX, 15, hwp_cap, Highest Performance change supported + 6, 0, EAX, 16, hwp_peci, HWP PECI override is supported + 6, 0, EAX, 17, hwp_flex, Flexible HWP is supported + 6, 0, EAX, 18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported +# 6, 0, EAX, 19, resv, Reserved + 6, 0, EAX, 20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported + + 6, 0, EBX, 3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor + 6, 0, ECX, 0, aperfmperf, Presence of IA32_MPERF and IA32_APERF + 6, 0, ECX, 3, energ_bias, Performance-energy bias preference supported + +# Leaf 07H +# ECX == 0 +# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512 +# XXX: Do we really need to enumerate each and every AVX512 sub features + + 7, 0, EBX, 0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported + 7, 0, EBX, 1, tsc_adjust, TSC_ADJUST MSR supported + 7, 0, EBX, 2, sgx, Software Guard Extensions + 7, 0, EBX, 3, bmi1, BMI1 + 7, 0, EBX, 4, hle, Hardware Lock Elision + 7, 0, EBX, 5, avx2, AVX2 +# 7, 0, EBX, 6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions + 7, 0, EBX, 7, smep, Supervisor-Mode Execution Prevention + 7, 0, EBX, 8, bmi2, BMI2 + 7, 0, EBX, 9, rep_movsb, Enhanced REP MOVSB/STOSB + 7, 0, EBX, 10, invpcid, INVPCID instruction + 7, 0, EBX, 11, rtm, Restricted Transactional Memory + 7, 0, EBX, 12, rdt_m, Intel RDT Monitoring capability + 7, 0, EBX, 13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS + 7, 0, EBX, 14, mpx, Memory Protection Extensions + 7, 0, EBX, 15, rdt_a, Intel RDT Allocation capability + 7, 0, EBX, 16, avx512f, AVX512 Foundation instr + 7, 0, EBX, 17, avx512dq, AVX512 Double and Quadword AVX512 instr + 7, 0, EBX, 18, rdseed, RDSEED instr + 7, 0, EBX, 19, adx, ADX instr + 7, 0, EBX, 20, smap, Supervisor Mode Access Prevention + 7, 0, EBX, 21, avx512ifma, AVX512 Integer Fused Multiply Add +# 7, 0, EBX, 22, resvd, resvd + 7, 0, EBX, 23, clflushopt, CLFLUSHOPT instr + 7, 0, EBX, 24, clwb, CLWB instr + 7, 0, EBX, 25, intel_pt, Intel Processor Trace instr + 7, 0, EBX, 26, avx512pf, Prefetch + 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr + 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr + 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr + 7, 0, EBX, 26, avx512bw, AVX512 Byte & Word instr + 7, 0, EBX, 28, avx512vl, AVX512 Vector Length Extentions (VL) + 7, 0, ECX, 0, prefetchwt1, X + 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions + 7, 0, ECX, 2, umip, User-mode Instruction Prevention + + 7, 0, ECX, 3, pku, Protection Keys for User-mode pages + 7, 0, ECX, 4, ospke, CR4 PKE set to enable protection keys +# 7, 0, ECX, 16:5, resvd, resvd + 7, 0, ECX, 21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode + 7, 0, ECX, 22, rdpid, RDPID and IA32_TSC_AUX +# 7, 0, ECX, 29:23, resvd, resvd + 7, 0, ECX, 30, sgx_lc, SGX Launch Configuration +# 7, 0, ECX, 31, resvd, resvd + +# Leaf 08H +# + + +# Leaf 09H +# Direct Cache Access (DCA) information + 9, 0, ECX, 31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP + +# Leaf 0AH +# Architectural Performance Monitoring +# +# Do we really need to print out the PMU related stuff? +# Does normal user really care about it? +# + 0xA, 0, EAX, 7:0, pmu_ver, Performance Monitoring Unit version + 0xA, 0, EAX, 15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU + 0xA, 0, EAX, 23:16, pmu_cnt_bits, Bit wideth of PMU counter + 0xA, 0, EAX, 31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events + + 0xA, 0, EBX, 0, pmu_no_core_cycle_evt, Core cycle event not available + 0xA, 0, EBX, 1, pmu_no_instr_ret_evt, Instruction retired event not available + 0xA, 0, EBX, 2, pmu_no_ref_cycle_evt, Reference cycles event not available + 0xA, 0, EBX, 3, pmu_no_llc_ref_evt, Last-level cache reference event not available + 0xA, 0, EBX, 4, pmu_no_llc_mis_evt, Last-level cache misses event not available + 0xA, 0, EBX, 5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available + 0xA, 0, EBX, 6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available + + 0xA, 0, ECX, 4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version + 0xA, 0, ECX, 12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU + +# Leaf 0BH +# Extended Topology Enumeration Leaf +# + + 0xB, 0, EAX, 4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type + 0xB, 0, EBX, 15:0, cpu_nr, Number of logical processors at this level type + 0xB, 0, ECX, 15:8, lvl_type, 0-Invalid 1-SMT 2-Core + 0xB, 0, EDX, 31:0, x2apic_id, x2APIC ID the current logical processor + + +# Leaf 0DH +# Processor Extended State + + 0xD, 0, EAX, 0, x87, X87 state + 0xD, 0, EAX, 1, sse, SSE state + 0xD, 0, EAX, 2, avx, AVX state + 0xD, 0, EAX, 4:3, mpx, MPX state + 0xD, 0, EAX, 7:5, avx512, AVX-512 state + 0xD, 0, EAX, 9, pkru, PKRU state + + 0xD, 0, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0 + 0xD, 0, ECX, 31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area + + 0xD, 1, EAX, 0, xsaveopt, XSAVEOPT available + 0xD, 1, EAX, 1, xsavec, XSAVEC and compacted form supported + 0xD, 1, EAX, 2, xgetbv, XGETBV supported + 0xD, 1, EAX, 3, xsaves, XSAVES/XRSTORS and IA32_XSS supported + + 0xD, 1, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0 + 0xD, 1, ECX, 8, pt, PT state + 0xD, 1, ECX, 11, cet_usr, CET user state + 0xD, 1, ECX, 12, cet_supv, CET supervisor state + 0xD, 1, ECX, 13, hdc, HDC state + 0xD, 1, ECX, 16, hwp, HWP state + +# Leaf 0FH +# Intel RDT Monitoring + + 0xF, 0, EBX, 31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types + 0xF, 0, EDX, 1, l3c_rdt_mon, L3 Cache RDT Monitoring supported + + 0xF, 1, ECX, 31:0, rmid_range, Maximum range (zero-based) of RMID of this types + 0xF, 1, EDX, 0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported + 0xF, 1, EDX, 1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported + 0xF, 1, EDX, 2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported + +# Leaf 10H +# Intel RDT Allocation + + 0x10, 0, EBX, 1, l3c_rdt_alloc, L3 Cache Allocation supported + 0x10, 0, EBX, 2, l2c_rdt_alloc, L2 Cache Allocation supported + 0x10, 0, EBX, 3, mem_bw_alloc, Memory Bandwidth Allocation supported + + +# Leaf 12H +# SGX Capability +# +# Some detailed SGX features not added yet + + 0x12, 0, EAX, 0, sgx1, L3 Cache Allocation supported + 0x12, 1, EAX, 0, sgx2, L3 Cache Allocation supported + + +# Leaf 14H +# Intel Processor Tracer +# + +# Leaf 15H +# Time Stamp Counter and Nominal Core Crystal Clock Information + + 0x15, 0, EAX, 31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio + 0x15, 0, EBX, 31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio + 0x15, 0, ECX, 31:0, nom_freq, Nominal frequency of the core crystal clock in Hz + +# Leaf 16H +# Processor Frequency Information + + 0x16, 0, EAX, 15:0, cpu_base_freq, Processor Base Frequency in MHz + 0x16, 0, EBX, 15:0, cpu_max_freq, Maximum Frequency in MHz + 0x16, 0, ECX, 15:0, bus_freq, Bus (Reference) Frequency in MHz + +# Leaf 17H +# System-On-Chip Vendor Attribute + + 0x17, 0, EAX, 31:0, max_socid, Maximum input value of supported sub-leaf + 0x17, 0, EBX, 15:0, soc_vid, SOC Vendor ID + 0x17, 0, EBX, 16, std_vid, SOC Vendor ID is assigned via an industry standard scheme + 0x17, 0, ECX, 31:0, soc_pid, SOC Project ID assigned by vendor + 0x17, 0, EDX, 31:0, soc_sid, SOC Stepping ID + +# Leaf 18H +# Deterministic Address Translation Parameters + + +# Leaf 19H +# Key Locker Leaf + + +# Leaf 1AH +# Hybrid Information + + 0x1A, 0, EAX, 31:24, core_type, 20H-Intel_Atom 40H-Intel_Core + + +# Leaf 1FH +# V2 Extended Topology - A preferred superset to leaf 0BH + + +# According to SDM +# 40000000H - 4FFFFFFFH is invalid range + + +# Leaf 80000001H +# Extended Processor Signature and Feature Bits + +0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode +0x80000001, 0, ECX, 5, lzcnt, LZCNT +0x80000001, 0, ECX, 8, prefetchw, PREFETCHW + +0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported +0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available +0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported +0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available +#0x80000001, 0, EDX, 29, 64b, 64b Architecture supported + +# Leaf 80000002H/80000003H/80000004H +# Processor Brand String + +# Leaf 80000005H +# Reserved + +# Leaf 80000006H +# Extended L2 Cache Features + +0x80000006, 0, ECX, 7:0, clsize, Cache Line size in bytes +0x80000006, 0, ECX, 15:12, l2c_assoc, L2 Associativity +0x80000006, 0, ECX, 31:16, csize, Cache size in 1K units + + +# Leaf 80000007H + +0x80000007, 0, EDX, 8, nonstop_tsc, Invariant TSC available + + +# Leaf 80000008H + +0x80000008, 0, EAX, 7:0, phy_adr_bits, Physical Address Bits +0x80000008, 0, EAX, 15:8, lnr_adr_bits, Linear Address Bits +0x80000007, 0, EBX, 9, wbnoinvd, WBNOINVD + +# 0x8000001E +# EAX: Extended APIC ID +0x8000001E, 0, EAX, 31:0, extended_apic_id, Extended APIC ID +# EBX: Core Identifiers +0x8000001E, 0, EBX, 7:0, core_id, Identifies the logical core ID +0x8000001E, 0, EBX, 15:8, threads_per_core, The number of threads per core is threads_per_core + 1 +# ECX: Node Identifiers +0x8000001E, 0, ECX, 7:0, node_id, Node ID +0x8000001E, 0, ECX, 10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved } + +# 8000001F: AMD Secure Encryption +0x8000001F, 0, EAX, 0, sme, Secure Memory Encryption +0x8000001F, 0, EAX, 1, sev, Secure Encrypted Virtualization +0x8000001F, 0, EAX, 2, vmpgflush, VM Page Flush MSR +0x8000001F, 0, EAX, 3, seves, SEV Encrypted State +0x8000001F, 0, EBX, 5:0, c-bit, Page table bit number used to enable memory encryption +0x8000001F, 0, EBX, 11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled +0x8000001F, 0, ECX, 31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest +0x8000001F, 0, EDX, 31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c new file mode 100644 index 000000000000..dae75511fef7 --- /dev/null +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef unsigned int u32; +typedef unsigned long long u64; + +char *def_csv = "/usr/share/misc/cpuid.csv"; +char *user_csv; + + +/* Cover both single-bit flag and multiple-bits fields */ +struct bits_desc { + /* start and end bits */ + int start, end; + /* 0 or 1 for 1-bit flag */ + int value; + char simp[32]; + char detail[256]; +}; + +/* descriptor info for eax/ebx/ecx/edx */ +struct reg_desc { + /* number of valid entries */ + int nr; + struct bits_desc descs[32]; +}; + +enum { + R_EAX = 0, + R_EBX, + R_ECX, + R_EDX, + NR_REGS +}; + +struct subleaf { + u32 index; + u32 sub; + u32 eax, ebx, ecx, edx; + struct reg_desc info[NR_REGS]; +}; + +/* Represent one leaf (basic or extended) */ +struct cpuid_func { + /* + * Array of subleafs for this func, if there is no subleafs + * then the leafs[0] is the main leaf + */ + struct subleaf *leafs; + int nr; +}; + +struct cpuid_range { + /* array of main leafs */ + struct cpuid_func *funcs; + /* number of valid leafs */ + int nr; + bool is_ext; +}; + +/* + * basic: basic functions range: [0... ] + * ext: extended functions range: [0x80000000... ] + */ +struct cpuid_range *leafs_basic, *leafs_ext; + +static int num_leafs; +static bool is_amd; +static bool show_details; +static bool show_raw; +static bool show_flags_only = true; +static u32 user_index = 0xFFFFFFFF; +static u32 user_sub = 0xFFFFFFFF; +static int flines; + +static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline bool has_subleafs(u32 f) +{ + if (f == 0x7 || f == 0xd) + return true; + + if (is_amd) { + if (f == 0x8000001d) + return true; + return false; + } + + switch (f) { + case 0x4: + case 0xb: + case 0xf: + case 0x10: + case 0x14: + case 0x18: + case 0x1f: + return true; + default: + return false; + } +} + +static void leaf_print_raw(struct subleaf *leaf) +{ + if (has_subleafs(leaf->index)) { + if (leaf->sub == 0) + printf("0x%08x: subleafs:\n", leaf->index); + + printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", + leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + } else { + printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", + leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + } +} + +/* Return true is the input eax/ebx/ecx/edx are all zero */ +static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, + u32 a, u32 b, u32 c, u32 d) +{ + struct cpuid_func *func; + struct subleaf *leaf; + int s = 0; + + if (a == 0 && b == 0 && c == 0 && d == 0) + return true; + + /* + * Cut off vendor-prefix from CPUID function as we're using it as an + * index into ->funcs. + */ + func = &range->funcs[f & 0xffff]; + + if (!func->leafs) { + func->leafs = malloc(sizeof(struct subleaf)); + if (!func->leafs) + perror("malloc func leaf"); + + func->nr = 1; + } else { + s = func->nr; + func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); + if (!func->leafs) + perror("realloc f->leafs"); + + func->nr++; + } + + leaf = &func->leafs[s]; + + leaf->index = f; + leaf->sub = subleaf; + leaf->eax = a; + leaf->ebx = b; + leaf->ecx = c; + leaf->edx = d; + + return false; +} + +static void raw_dump_range(struct cpuid_range *range) +{ + u32 f; + int i; + + printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic"); + printf("================\n"); + + for (f = 0; (int)f < range->nr; f++) { + struct cpuid_func *func = &range->funcs[f]; + u32 index = f; + + if (range->is_ext) + index += 0x80000000; + + /* Skip leaf without valid items */ + if (!func->nr) + continue; + + /* First item is the main leaf, followed by all subleafs */ + for (i = 0; i < func->nr; i++) + leaf_print_raw(&func->leafs[i]); + } +} + +#define MAX_SUBLEAF_NUM 32 +struct cpuid_range *setup_cpuid_range(u32 input_eax) +{ + u32 max_func, idx_func; + int subleaf; + struct cpuid_range *range; + u32 eax, ebx, ecx, edx; + u32 f = input_eax; + int max_subleaf; + bool allzero; + + eax = input_eax; + ebx = ecx = edx = 0; + + cpuid(&eax, &ebx, &ecx, &edx); + max_func = eax; + idx_func = (max_func & 0xffff) + 1; + + range = malloc(sizeof(struct cpuid_range)); + if (!range) + perror("malloc range"); + + if (input_eax & 0x80000000) + range->is_ext = true; + else + range->is_ext = false; + + range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); + if (!range->funcs) + perror("malloc range->funcs"); + + range->nr = idx_func; + memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); + + for (; f <= max_func; f++) { + eax = f; + subleaf = ecx = 0; + + cpuid(&eax, &ebx, &ecx, &edx); + allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); + if (allzero) + continue; + num_leafs++; + + if (!has_subleafs(f)) + continue; + + max_subleaf = MAX_SUBLEAF_NUM; + + /* + * Some can provide the exact number of subleafs, + * others have to be tried (0xf) + */ + if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18) + max_subleaf = (eax & 0xff) + 1; + + if (f == 0xb) + max_subleaf = 2; + + for (subleaf = 1; subleaf < max_subleaf; subleaf++) { + eax = f; + ecx = subleaf; + + cpuid(&eax, &ebx, &ecx, &edx); + allzero = cpuid_store(range, f, subleaf, + eax, ebx, ecx, edx); + if (allzero) + continue; + num_leafs++; + } + + } + + return range; +} + +/* + * The basic row format for cpuid.csv is + * LEAF,SUBLEAF,register_name,bits,short name,long description + * + * like: + * 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs + * 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) + */ +static int parse_line(char *line) +{ + char *str; + int i; + struct cpuid_range *range; + struct cpuid_func *func; + struct subleaf *leaf; + u32 index; + u32 sub; + char buffer[512]; + char *buf; + /* + * Tokens: + * 1. leaf + * 2. subleaf + * 3. register + * 4. bits + * 5. short name + * 6. long detail + */ + char *tokens[6]; + struct reg_desc *reg; + struct bits_desc *bdesc; + int reg_index; + char *start, *end; + + /* Skip comments and NULL line */ + if (line[0] == '#' || line[0] == '\n') + return 0; + + strncpy(buffer, line, 511); + buffer[511] = 0; + str = buffer; + for (i = 0; i < 5; i++) { + tokens[i] = strtok(str, ","); + if (!tokens[i]) + goto err_exit; + str = NULL; + } + tokens[5] = strtok(str, "\n"); + if (!tokens[5]) + goto err_exit; + + /* index/main-leaf */ + index = strtoull(tokens[0], NULL, 0); + + if (index & 0x80000000) + range = leafs_ext; + else + range = leafs_basic; + + index &= 0x7FFFFFFF; + /* Skip line parsing for non-existing indexes */ + if ((int)index >= range->nr) + return -1; + + func = &range->funcs[index]; + + /* Return if the index has no valid item on this platform */ + if (!func->nr) + return 0; + + /* subleaf */ + sub = strtoul(tokens[1], NULL, 0); + if ((int)sub > func->nr) + return -1; + + leaf = &func->leafs[sub]; + buf = tokens[2]; + + if (strcasestr(buf, "EAX")) + reg_index = R_EAX; + else if (strcasestr(buf, "EBX")) + reg_index = R_EBX; + else if (strcasestr(buf, "ECX")) + reg_index = R_ECX; + else if (strcasestr(buf, "EDX")) + reg_index = R_EDX; + else + goto err_exit; + + reg = &leaf->info[reg_index]; + bdesc = ®->descs[reg->nr++]; + + /* bit flag or bits field */ + buf = tokens[3]; + + end = strtok(buf, ":"); + bdesc->end = strtoul(end, NULL, 0); + bdesc->start = bdesc->end; + + /* start != NULL means it is bit fields */ + start = strtok(NULL, ":"); + if (start) + bdesc->start = strtoul(start, NULL, 0); + + strcpy(bdesc->simp, tokens[4]); + strcpy(bdesc->detail, tokens[5]); + return 0; + +err_exit: + printf("Warning: wrong line format:\n"); + printf("\tline[%d]: %s\n", flines, line); + return -1; +} + +/* Parse csv file, and construct the array of all leafs and subleafs */ +static void parse_text(void) +{ + FILE *file; + char *filename, *line = NULL; + size_t len = 0; + int ret; + + if (show_raw) + return; + + filename = user_csv ? user_csv : def_csv; + file = fopen(filename, "r"); + if (!file) { + /* Fallback to a csv in the same dir */ + file = fopen("./cpuid.csv", "r"); + } + + if (!file) { + printf("Fail to open '%s'\n", filename); + return; + } + + while (1) { + ret = getline(&line, &len, file); + flines++; + if (ret > 0) + parse_line(line); + + if (feof(file)) + break; + } + + fclose(file); +} + + +/* Decode every eax/ebx/ecx/edx */ +static void decode_bits(u32 value, struct reg_desc *rdesc) +{ + struct bits_desc *bdesc; + int start, end, i; + u32 mask; + + for (i = 0; i < rdesc->nr; i++) { + bdesc = &rdesc->descs[i]; + + start = bdesc->start; + end = bdesc->end; + if (start == end) { + /* single bit flag */ + if (value & (1 << start)) + printf("\t%-20s %s%s\n", + bdesc->simp, + show_details ? "-" : "", + show_details ? bdesc->detail : "" + ); + } else { + /* bit fields */ + if (show_flags_only) + continue; + + mask = ((u64)1 << (end - start + 1)) - 1; + printf("\t%-20s\t: 0x%-8x\t%s%s\n", + bdesc->simp, + (value >> start) & mask, + show_details ? "-" : "", + show_details ? bdesc->detail : "" + ); + } + } +} + +static void show_leaf(struct subleaf *leaf) +{ + if (!leaf) + return; + + if (show_raw) + leaf_print_raw(leaf); + + decode_bits(leaf->eax, &leaf->info[R_EAX]); + decode_bits(leaf->ebx, &leaf->info[R_EBX]); + decode_bits(leaf->ecx, &leaf->info[R_ECX]); + decode_bits(leaf->edx, &leaf->info[R_EDX]); +} + +static void show_func(struct cpuid_func *func) +{ + int i; + + if (!func) + return; + + for (i = 0; i < func->nr; i++) + show_leaf(&func->leafs[i]); +} + +static void show_range(struct cpuid_range *range) +{ + int i; + + for (i = 0; i < range->nr; i++) + show_func(&range->funcs[i]); +} + +static inline struct cpuid_func *index_to_func(u32 index) +{ + struct cpuid_range *range; + + range = (index & 0x80000000) ? leafs_ext : leafs_basic; + index &= 0x7FFFFFFF; + + if (((index & 0xFFFF) + 1) > (u32)range->nr) { + printf("ERR: invalid input index (0x%x)\n", index); + return NULL; + } + return &range->funcs[index]; +} + +static void show_info(void) +{ + struct cpuid_func *func; + + if (show_raw) { + /* Show all of the raw output of 'cpuid' instr */ + raw_dump_range(leafs_basic); + raw_dump_range(leafs_ext); + return; + } + + if (user_index != 0xFFFFFFFF) { + /* Only show specific leaf/subleaf info */ + func = index_to_func(user_index); + if (!func) + return; + + /* Dump the raw data also */ + show_raw = true; + + if (user_sub != 0xFFFFFFFF) { + if (user_sub + 1 <= (u32)func->nr) { + show_leaf(&func->leafs[user_sub]); + return; + } + + printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + } + + show_func(func); + return; + } + + printf("CPU features:\n=============\n\n"); + show_range(leafs_basic); + show_range(leafs_ext); +} + +static void setup_platform_cpuid(void) +{ + u32 eax, ebx, ecx, edx; + + /* Check vendor */ + eax = ebx = ecx = edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + + /* "htuA" */ + if (ebx == 0x68747541) + is_amd = true; + + /* Setup leafs for the basic and extended range */ + leafs_basic = setup_cpuid_range(0x0); + leafs_ext = setup_cpuid_range(0x80000000); +} + +static void usage(void) +{ + printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the cpuid csv file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw cpuid data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + ); +} + +static struct option opts[] = { + { "all", no_argument, NULL, 'a' }, /* show both bit flags and fields */ + { "bitflags", no_argument, NULL, 'b' }, /* only show bit flags, default on */ + { "detail", no_argument, NULL, 'd' }, /* show detail descriptions */ + { "file", required_argument, NULL, 'f' }, /* use user's cpuid file */ + { "help", no_argument, NULL, 'h'}, /* show usage */ + { "leaf", required_argument, NULL, 'l'}, /* only check a specific leaf */ + { "raw", no_argument, NULL, 'r'}, /* show raw CPUID leaf data */ + { "subleaf", required_argument, NULL, 's'}, /* check a specific subleaf */ + { NULL, 0, NULL, 0 } +}; + +static int parse_options(int argc, char *argv[]) +{ + int c; + + while ((c = getopt_long(argc, argv, "abdf:hl:rs:", + opts, NULL)) != -1) + switch (c) { + case 'a': + show_flags_only = false; + break; + case 'b': + show_flags_only = true; + break; + case 'd': + show_details = true; + break; + case 'f': + user_csv = optarg; + break; + case 'h': + usage(); + exit(1); + break; + case 'l': + /* main leaf */ + user_index = strtoul(optarg, NULL, 0); + break; + case 'r': + show_raw = true; + break; + case 's': + /* subleaf */ + user_sub = strtoul(optarg, NULL, 0); + break; + default: + printf("%s: Invalid option '%c'\n", argv[0], optopt); + return -1; + } + + return 0; +} + +/* + * Do 4 things in turn: + * 1. Parse user options + * 2. Parse and store all the CPUID leaf data supported on this platform + * 2. Parse the csv file, while skipping leafs which are not available + * on this platform + * 3. Print leafs info based on user options + */ +int main(int argc, char *argv[]) +{ + if (parse_options(argc, argv)) + return -1; + + /* Setup the cpuid leafs of current platform */ + setup_platform_cpuid(); + + /* Read and parse the 'cpuid.csv' */ + parse_text(); + + show_info(); + return 0; +} diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c index 4f5ed49e1b4e..dfbcc6405941 100644 --- a/tools/arch/x86/lib/inat.c +++ b/tools/arch/x86/lib/inat.c @@ -4,7 +4,7 @@ * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ -#include "../include/asm/insn.h" +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 3d9355ed1246..c41f95815480 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -11,10 +11,13 @@ #else #include <string.h> #endif -#include "../include/asm/inat.h" -#include "../include/asm/insn.h" +#include "../include/asm/inat.h" /* __ignore_sync_check__ */ +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ -#include "../include/asm/emulate_prefix.h" +#include <linux/errno.h> +#include <linux/kconfig.h> + +#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ #define leXX_to_cpu(t, r) \ ({ \ @@ -51,6 +54,7 @@ * insn_init() - initialize struct insn * @insn: &struct insn to be initialized * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr * @x86_64: !0 for 64-bit kernel or 64-bit app */ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) @@ -111,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn) * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. + * + * * Returns: + * 0: on success + * < 0: on error */ -void insn_get_prefixes(struct insn *insn) +int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; @@ -120,7 +128,7 @@ void insn_get_prefixes(struct insn *insn) int i, nb; if (prefixes->got) - return; + return 0; insn_get_emulate_prefix(insn); @@ -230,8 +238,10 @@ vex_end: prefixes->got = 1; + return 0; + err_out: - return; + return -ENODATA; } /** @@ -243,16 +253,25 @@ err_out: * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_opcode(struct insn *insn) +int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; + int pfx_id, ret; insn_byte_t op; - int pfx_id; + if (opcode->got) - return; - if (!insn->prefixes.got) - insn_get_prefixes(insn); + return 0; + + if (!insn->prefixes.got) { + ret = insn_get_prefixes(insn); + if (ret) + return ret; + } /* Get first opcode */ op = get_next(insn_byte_t, insn); @@ -267,9 +286,13 @@ void insn_get_opcode(struct insn *insn) insn->attr = inat_get_avx_attribute(op, m, p); if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && - !inat_is_group(insn->attr))) - insn->attr = 0; /* This instruction is bad */ - goto end; /* VEX has only 1 byte for opcode */ + !inat_is_group(insn->attr))) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + /* VEX has only 1 byte for opcode */ + goto end; } insn->attr = inat_get_opcode_attribute(op); @@ -280,13 +303,18 @@ void insn_get_opcode(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } - if (inat_must_vex(insn->attr)) - insn->attr = 0; /* This instruction is bad */ + + if (inat_must_vex(insn->attr)) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } end: opcode->got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -296,15 +324,25 @@ err_out: * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_modrm(struct insn *insn) +int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; + int ret; + if (modrm->got) - return; - if (!insn->opcode.got) - insn_get_opcode(insn); + return 0; + + if (!insn->opcode.got) { + ret = insn_get_opcode(insn); + if (ret) + return ret; + } if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); @@ -313,17 +351,22 @@ void insn_get_modrm(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); - if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) - insn->attr = 0; /* This is bad */ + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { + /* Bad insn */ + insn->attr = 0; + return -EINVAL; + } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; + modrm->got = 1; + return 0; err_out: - return; + return -ENODATA; } @@ -337,11 +380,16 @@ err_out: int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; + int ret; if (!insn->x86_64) return 0; - if (!modrm->got) - insn_get_modrm(insn); + + if (!modrm->got) { + ret = insn_get_modrm(insn); + if (ret) + return 0; + } /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. @@ -355,15 +403,25 @@ int insn_rip_relative(struct insn *insn) * * If necessary, first collects the instruction up to and including the * ModRM byte. + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_sib(struct insn *insn) +int insn_get_sib(struct insn *insn) { insn_byte_t modrm; + int ret; if (insn->sib.got) - return; - if (!insn->modrm.got) - insn_get_modrm(insn); + return 0; + + if (!insn->modrm.got) { + ret = insn_get_modrm(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && @@ -374,8 +432,10 @@ void insn_get_sib(struct insn *insn) } insn->sib.got = 1; + return 0; + err_out: - return; + return -ENODATA; } @@ -386,15 +446,25 @@ err_out: * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. + * + * * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_displacement(struct insn *insn) +int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; + int ret; if (insn->displacement.got) - return; - if (!insn->sib.got) - insn_get_sib(insn); + return 0; + + if (!insn->sib.got) { + ret = insn_get_sib(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: @@ -436,9 +506,10 @@ void insn_get_displacement(struct insn *insn) } out: insn->displacement.got = 1; + return 0; err_out: - return; + return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ @@ -537,20 +608,30 @@ err_out: } /** - * insn_get_immediate() - Get the immediates of instruction + * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be - * get by bit masking with ((1 << (nbytes * 8)) - 1) + * computed by bit masking with ((1 << (nbytes * 8)) - 1) + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_immediate(struct insn *insn) +int insn_get_immediate(struct insn *insn) { + int ret; + if (insn->immediate.got) - return; - if (!insn->displacement.got) - insn_get_displacement(insn); + return 0; + + if (!insn->displacement.got) { + ret = insn_get_displacement(insn); + if (ret) + return ret; + } if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) @@ -597,9 +678,10 @@ void insn_get_immediate(struct insn *insn) } done: insn->immediate.got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -608,13 +690,65 @@ err_out: * * If necessary, first collects the instruction up to and including the * immediates bytes. - */ -void insn_get_length(struct insn *insn) + * + * Returns: + * - 0 on success + * - < 0 on error +*/ +int insn_get_length(struct insn *insn) { + int ret; + if (insn->length) - return; - if (!insn->immediate.got) - insn_get_immediate(insn); + return 0; + + if (!insn->immediate.got) { + ret = insn_get_immediate(insn); + if (ret) + return ret; + } + insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); + + return 0; +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +/** + * insn_decode() - Decode an x86 instruction + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr + * @m: insn mode, see enum insn_mode + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. + */ +int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) +{ + int ret; + +#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ + + if (m == INSN_MODE_KERN) + insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); + else + insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); + + ret = insn_get_length(insn); + if (ret) + return ret; + + if (insn_complete(insn)) + return 0; + + return -EINVAL; } diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index c4225ed63565..1600b17dbb8a 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -128,9 +128,9 @@ def detect_kernel_config(): cfg['nr_nodes'] = prog['nr_online_nodes'].value_() - if prog.type('struct kmem_cache').members[1][1] == 'flags': + if prog.type('struct kmem_cache').members[1].name == 'flags': cfg['allocator'] = 'SLUB' - elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': + elif prog.type('struct kmem_cache').members[1].name == 'batchcount': cfg['allocator'] = 'SLAB' else: err('Can\'t determine the slab allocator') @@ -193,7 +193,7 @@ def main(): # look over all slab pages, belonging to non-root memcgs # and look for objects belonging to the given memory cgroup for page in for_each_slab_page(prog): - objcg_vec_raw = page.obj_cgroups.value_() + objcg_vec_raw = page.memcg_data.value_() if objcg_vec_raw == 0: continue cache = page.slab_cache @@ -202,7 +202,7 @@ def main(): addr = cache.value_() caches[addr] = cache # clear the lowest bit to get the true obj_cgroups - objcg_vec = Object(prog, page.obj_cgroups.type_, + objcg_vec = Object(prog, 'struct obj_cgroup **', value=objcg_vec_raw & ~1) if addr not in stats: diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint index 607b2b280945..719f18b1edf0 100755 --- a/tools/debugging/kernel-chktaint +++ b/tools/debugging/kernel-chktaint @@ -25,7 +25,7 @@ if [ "$1"x != "x" ]; then elif [ $1 -ge 0 ] 2>/dev/null ; then taint=$1 else - echo "Error: Parameter '$1' not a positive interger. Aborting." >&2 + echo "Error: Parameter '$1' not a positive integer. Aborting." >&2 exit 1 fi else diff --git a/tools/iio/Makefile b/tools/iio/Makefile index 3de763d9ab70..5d12ac4e7f8f 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -27,6 +27,7 @@ include $(srctree)/tools/build/Makefile.include # $(OUTPUT)include/linux/iio: ../../include/uapi/linux/iio mkdir -p $(OUTPUT)include/linux/iio 2>&1 || true + ln -sf $(CURDIR)/../../include/uapi/linux/iio/buffer.h $@ ln -sf $(CURDIR)/../../include/uapi/linux/iio/events.h $@ ln -sf $(CURDIR)/../../include/uapi/linux/iio/types.h $@ diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index bb03859db89d..0076437f6e3f 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -14,6 +14,7 @@ #include <unistd.h> #include <stdlib.h> +#include <dirent.h> #include <stdbool.h> #include <stdio.h> #include <errno.h> @@ -280,22 +281,69 @@ static void print_event(struct iio_event_data *event) printf("\n"); } +/* Enable or disable events in sysfs if the knob is available */ +static void enable_events(char *dev_dir, int enable) +{ + const struct dirent *ent; + char evdir[256]; + int ret; + DIR *dp; + + snprintf(evdir, sizeof(evdir), FORMAT_EVENTS_DIR, dev_dir); + evdir[sizeof(evdir)-1] = '\0'; + + dp = opendir(evdir); + if (!dp) { + fprintf(stderr, "Enabling/disabling events: can't open %s\n", + evdir); + return; + } + + while (ent = readdir(dp), ent) { + if (iioutils_check_suffix(ent->d_name, "_en")) { + printf("%sabling: %s\n", + enable ? "En" : "Dis", + ent->d_name); + ret = write_sysfs_int(ent->d_name, evdir, + enable); + if (ret < 0) + fprintf(stderr, "Failed to enable/disable %s\n", + ent->d_name); + } + } + + if (closedir(dp) == -1) { + perror("Enabling/disabling channels: " + "Failed to close directory"); + return; + } +} + int main(int argc, char **argv) { struct iio_event_data event; const char *device_name; + char *dev_dir_name = NULL; char *chrdev_name; int ret; int dev_num; int fd, event_fd; - - if (argc <= 1) { - fprintf(stderr, "Usage: %s <device_name>\n", argv[0]); + bool all_events = false; + + if (argc == 2) { + device_name = argv[1]; + } else if (argc == 3) { + device_name = argv[2]; + if (!strcmp(argv[1], "-a")) + all_events = true; + } else { + fprintf(stderr, + "Usage: iio_event_monitor [options] <device_name>\n" + "Listen and display events from IIO devices\n" + " -a Auto-activate all available events\n"); return -1; } - device_name = argv[1]; - dev_num = find_type_by_name(device_name, "iio:device"); if (dev_num >= 0) { printf("Found IIO device with name %s with device number %d\n", @@ -303,6 +351,10 @@ int main(int argc, char **argv) ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num); if (ret < 0) return -ENOMEM; + /* Look up sysfs dir as well if we can */ + ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num); + if (ret < 0) + return -ENOMEM; } else { /* * If we can't find an IIO device by name assume device_name is @@ -313,6 +365,9 @@ int main(int argc, char **argv) return -ENOMEM; } + if (all_events && dev_dir_name) + enable_events(dev_dir_name, 1); + fd = open(chrdev_name, 0); if (fd == -1) { ret = -errno; @@ -365,6 +420,10 @@ int main(int argc, char **argv) perror("Failed to close event file"); error_free_chrdev_name: + /* Disable events after use */ + if (all_events && dev_dir_name) + enable_events(dev_dir_name, 0); + free(chrdev_name); return ret; diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 34d63bcebcd2..2491c54a5e4f 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -30,6 +30,8 @@ #include <inttypes.h> #include <stdbool.h> #include <signal.h> +#include <sys/ioctl.h> +#include <linux/iio/buffer.h> #include "iio_utils.h" /** @@ -49,7 +51,7 @@ enum autochan { * Has the side effect of filling the channels[i].location values used * in processing the buffer output. **/ -int size_from_channelarray(struct iio_channel_info *channels, int num_channels) +static int size_from_channelarray(struct iio_channel_info *channels, int num_channels) { int bytes = 0; int i = 0; @@ -68,7 +70,7 @@ int size_from_channelarray(struct iio_channel_info *channels, int num_channels) return bytes; } -void print1byte(uint8_t input, struct iio_channel_info *info) +static void print1byte(uint8_t input, struct iio_channel_info *info) { /* * Shift before conversion to avoid sign extension @@ -85,7 +87,7 @@ void print1byte(uint8_t input, struct iio_channel_info *info) } } -void print2byte(uint16_t input, struct iio_channel_info *info) +static void print2byte(uint16_t input, struct iio_channel_info *info) { /* First swap if incorrect endian */ if (info->be) @@ -108,7 +110,7 @@ void print2byte(uint16_t input, struct iio_channel_info *info) } } -void print4byte(uint32_t input, struct iio_channel_info *info) +static void print4byte(uint32_t input, struct iio_channel_info *info) { /* First swap if incorrect endian */ if (info->be) @@ -131,7 +133,7 @@ void print4byte(uint32_t input, struct iio_channel_info *info) } } -void print8byte(uint64_t input, struct iio_channel_info *info) +static void print8byte(uint64_t input, struct iio_channel_info *info) { /* First swap if incorrect endian */ if (info->be) @@ -167,9 +169,8 @@ void print8byte(uint64_t input, struct iio_channel_info *info) * to fill the location offsets. * @num_channels: number of channels **/ -void process_scan(char *data, - struct iio_channel_info *channels, - int num_channels) +static void process_scan(char *data, struct iio_channel_info *channels, + int num_channels) { int k; @@ -198,7 +199,7 @@ void process_scan(char *data, printf("\n"); } -static int enable_disable_all_channels(char *dev_dir_name, int enable) +static int enable_disable_all_channels(char *dev_dir_name, int buffer_idx, int enable) { const struct dirent *ent; char scanelemdir[256]; @@ -206,7 +207,7 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable) int ret; snprintf(scanelemdir, sizeof(scanelemdir), - FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name); + FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name, buffer_idx); scanelemdir[sizeof(scanelemdir)-1] = '\0'; dp = opendir(scanelemdir); @@ -238,12 +239,13 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable) return 0; } -void print_usage(void) +static void print_usage(void) { fprintf(stderr, "Usage: generic_buffer [options]...\n" "Capture, convert and output data from IIO device buffer\n" " -a Auto-activate all available channels\n" " -A Force-activate ALL channels\n" + " -b <n> The buffer which to open (by index), default 0\n" " -c <n> Do n conversions, or loop forever if n < 0\n" " -e Disable wait for event (new data)\n" " -g Use trigger-less mode\n" @@ -257,12 +259,13 @@ void print_usage(void) " -w <n> Set delay between reads in us (event-less mode)\n"); } -enum autochan autochannels = AUTOCHANNELS_DISABLED; -char *dev_dir_name = NULL; -char *buf_dir_name = NULL; -bool current_trigger_set = false; +static enum autochan autochannels = AUTOCHANNELS_DISABLED; +static char *dev_dir_name = NULL; +static char *buf_dir_name = NULL; +static int buffer_idx = 0; +static bool current_trigger_set = false; -void cleanup(void) +static void cleanup(void) { int ret; @@ -287,21 +290,21 @@ void cleanup(void) /* Disable channels if auto-enabled */ if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) { - ret = enable_disable_all_channels(dev_dir_name, 0); + ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 0); if (ret) fprintf(stderr, "Failed to disable all channels\n"); autochannels = AUTOCHANNELS_DISABLED; } } -void sig_handler(int signum) +static void sig_handler(int signum) { fprintf(stderr, "Caught signal %d\n", signum); cleanup(); exit(-signum); } -void register_cleanup(void) +static void register_cleanup(void) { struct sigaction sa = { .sa_handler = sig_handler }; const int signums[] = { SIGINT, SIGTERM, SIGABRT }; @@ -334,7 +337,9 @@ int main(int argc, char **argv) unsigned long long j; unsigned long toread; int ret, c; - int fp = -1; + struct stat st; + int fd = -1; + int buf_fd = -1; int num_channels = 0; char *trigger_name = NULL, *device_name = NULL; @@ -353,7 +358,7 @@ int main(int argc, char **argv) register_cleanup(); - while ((c = getopt_long(argc, argv, "aAc:egl:n:N:t:T:w:?", longopts, + while ((c = getopt_long(argc, argv, "aAb:c:egl:n:N:t:T:w:?", longopts, NULL)) != -1) { switch (c) { case 'a': @@ -362,7 +367,20 @@ int main(int argc, char **argv) case 'A': autochannels = AUTOCHANNELS_ENABLED; force_autochannels = true; - break; + break; + case 'b': + errno = 0; + buffer_idx = strtoll(optarg, &dummy, 10); + if (errno) { + ret = -errno; + goto error; + } + if (buffer_idx < 0) { + ret = -ERANGE; + goto error; + } + + break; case 'c': errno = 0; num_loops = strtoll(optarg, &dummy, 10); @@ -519,7 +537,7 @@ int main(int argc, char **argv) * Parse the files in scan_elements to identify what channels are * present */ - ret = build_channel_array(dev_dir_name, &channels, &num_channels); + ret = build_channel_array(dev_dir_name, buffer_idx, &channels, &num_channels); if (ret) { fprintf(stderr, "Problem reading scan element information\n" "diag %s\n", dev_dir_name); @@ -536,7 +554,7 @@ int main(int argc, char **argv) (autochannels == AUTOCHANNELS_ENABLED && force_autochannels)) { fprintf(stderr, "Enabling all channels\n"); - ret = enable_disable_all_channels(dev_dir_name, 1); + ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 1); if (ret) { fprintf(stderr, "Failed to enable all channels\n"); goto error; @@ -545,7 +563,7 @@ int main(int argc, char **argv) /* This flags that we need to disable the channels again */ autochannels = AUTOCHANNELS_ACTIVE; - ret = build_channel_array(dev_dir_name, &channels, + ret = build_channel_array(dev_dir_name, buffer_idx, &channels, &num_channels); if (ret) { fprintf(stderr, "Problem reading scan element " @@ -566,7 +584,7 @@ int main(int argc, char **argv) fprintf(stderr, "Enable channels manually in " FORMAT_SCAN_ELEMENTS_DIR "/*_en or pass -a to autoenable channels and " - "try again.\n", dev_dir_name); + "try again.\n", dev_dir_name, buffer_idx); ret = -ENOENT; goto error; } @@ -577,12 +595,25 @@ int main(int argc, char **argv) * be built rather than found. */ ret = asprintf(&buf_dir_name, - "%siio:device%d/buffer", iio_dir, dev_num); + "%siio:device%d/buffer%d", iio_dir, dev_num, buffer_idx); if (ret < 0) { ret = -ENOMEM; goto error; } + if (stat(buf_dir_name, &st)) { + fprintf(stderr, "Could not stat() '%s', got error %d: %s\n", + buf_dir_name, errno, strerror(errno)); + ret = -errno; + goto error; + } + + if (!S_ISDIR(st.st_mode)) { + fprintf(stderr, "File '%s' is not a directory\n", buf_dir_name); + ret = -EFAULT; + goto error; + } + if (!notrigger) { printf("%s %s\n", dev_dir_name, trigger_name); /* @@ -599,6 +630,35 @@ int main(int argc, char **argv) } } + ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error; + } + + /* Attempt to open non blocking the access dev */ + fd = open(buffer_access, O_RDONLY | O_NONBLOCK); + if (fd == -1) { /* TODO: If it isn't there make the node */ + ret = -errno; + fprintf(stderr, "Failed to open %s\n", buffer_access); + goto error; + } + + /* specify for which buffer index we want an FD */ + buf_fd = buffer_idx; + + ret = ioctl(fd, IIO_BUFFER_GET_FD_IOCTL, &buf_fd); + if (ret == -1 || buf_fd == -1) { + ret = -errno; + if (ret == -ENODEV || ret == -EINVAL) + fprintf(stderr, + "Device does not have this many buffers\n"); + else + fprintf(stderr, "Failed to retrieve buffer fd\n"); + + goto error; + } + /* Setup ring buffer parameters */ ret = write_sysfs_int("length", buf_dir_name, buf_len); if (ret < 0) @@ -608,7 +668,8 @@ int main(int argc, char **argv) ret = write_sysfs_int("enable", buf_dir_name, 1); if (ret < 0) { fprintf(stderr, - "Failed to enable buffer: %s\n", strerror(-ret)); + "Failed to enable buffer '%s': %s\n", + buf_dir_name, strerror(-ret)); goto error; } @@ -619,24 +680,30 @@ int main(int argc, char **argv) goto error; } - ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num); - if (ret < 0) { - ret = -ENOMEM; - goto error; + /** + * This check is being done here for sanity reasons, however it + * should be omitted under normal operation. + * If this is buffer0, we check that we get EBUSY after this point. + */ + if (buffer_idx == 0) { + errno = 0; + read_size = read(fd, data, 1); + if (read_size > -1 || errno != EBUSY) { + ret = -EFAULT; + perror("Reading from '%s' should not be possible after ioctl()"); + goto error; + } } - /* Attempt to open non blocking the access dev */ - fp = open(buffer_access, O_RDONLY | O_NONBLOCK); - if (fp == -1) { /* TODO: If it isn't there make the node */ - ret = -errno; - fprintf(stderr, "Failed to open %s\n", buffer_access); - goto error; - } + /* close now the main chardev FD and let the buffer FD work */ + if (close(fd) == -1) + perror("Failed to close character device file"); + fd = -1; for (j = 0; j < num_loops || num_loops < 0; j++) { if (!noevents) { struct pollfd pfd = { - .fd = fp, + .fd = buf_fd, .events = POLLIN, }; @@ -654,7 +721,7 @@ int main(int argc, char **argv) toread = 64; } - read_size = read(fp, data, toread * scan_size); + read_size = read(buf_fd, data, toread * scan_size); if (read_size < 0) { if (errno == EAGAIN) { fprintf(stderr, "nothing available\n"); @@ -671,7 +738,9 @@ int main(int argc, char **argv) error: cleanup(); - if (fp >= 0 && close(fp) == -1) + if (fd >= 0 && close(fd) == -1) + perror("Failed to close character device"); + if (buf_fd >= 0 && close(buf_fd) == -1) perror("Failed to close buffer"); free(buffer_access); free(data); diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 7399eb7f1378..aadee6d34c74 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -77,15 +77,17 @@ int iioutils_break_up_name(const char *full_name, char **generic_name) * @mask: output a bit mask for the raw data * @be: output if data in big endian * @device_dir: the IIO device directory + * @buffer_idx: the IIO buffer index * @name: the channel name * @generic_name: the channel type name * * Returns a value >= 0 on success, otherwise a negative error code. **/ -int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, - unsigned *shift, uint64_t *mask, unsigned *be, - const char *device_dir, const char *name, - const char *generic_name) +static int iioutils_get_type(unsigned int *is_signed, unsigned int *bytes, + unsigned int *bits_used, unsigned int *shift, + uint64_t *mask, unsigned int *be, + const char *device_dir, int buffer_idx, + const char *name, const char *generic_name) { FILE *sysfsfp; int ret; @@ -95,7 +97,7 @@ int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, unsigned padint; const struct dirent *ent; - ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx); if (ret < 0) return -ENOMEM; @@ -303,12 +305,13 @@ void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt) /** * build_channel_array() - function to figure out what channels are present * @device_dir: the IIO device directory in sysfs + * @buffer_idx: the IIO buffer for this channel array * @ci_array: output the resulting array of iio_channel_info * @counter: output the amount of array elements * * Returns 0 on success, otherwise a negative error code. **/ -int build_channel_array(const char *device_dir, +int build_channel_array(const char *device_dir, int buffer_idx, struct iio_channel_info **ci_array, int *counter) { DIR *dp; @@ -321,7 +324,7 @@ int build_channel_array(const char *device_dir, char *filename; *counter = 0; - ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx); if (ret < 0) return -ENOMEM; @@ -502,6 +505,7 @@ int build_channel_array(const char *device_dir, ¤t->mask, ¤t->be, device_dir, + buffer_idx, current->name, current->generic_name); if (ret < 0) diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h index 74bde4fde2c8..663c94a6c705 100644 --- a/tools/iio/iio_utils.h +++ b/tools/iio/iio_utils.h @@ -12,7 +12,8 @@ /* Made up value to limit allocation sizes */ #define IIO_MAX_NAME_LENGTH 64 -#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements" +#define FORMAT_SCAN_ELEMENTS_DIR "%s/buffer%d" +#define FORMAT_EVENTS_DIR "%s/events" #define FORMAT_TYPE_FILE "%s_type" #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) @@ -57,15 +58,11 @@ static inline int iioutils_check_suffix(const char *str, const char *suffix) } int iioutils_break_up_name(const char *full_name, char **generic_name); -int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, - unsigned *shift, uint64_t *mask, unsigned *be, - const char *device_dir, const char *name, - const char *generic_name); int iioutils_get_param_float(float *output, const char *param_name, const char *device_dir, const char *name, const char *generic_name); void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt); -int build_channel_array(const char *device_dir, +int build_channel_array(const char *device_dir, int buffer_idx, struct iio_channel_info **ci_array, int *counter); int find_type_by_name(const char *name, const char *type); int write_sysfs_int(const char *filename, const char *basedir, int val); diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h new file mode 100644 index 000000000000..1555a0c4f345 --- /dev/null +++ b/tools/include/linux/kconfig.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_KCONFIG_H +#define _TOOLS_LINUX_KCONFIG_H + +/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __BIG_ENDIAN 4321 +#else +#define __LITTLE_ENDIAN 1234 +#endif + +#define __ARG_PLACEHOLDER_1 0, +#define __take_second_arg(__ignored, val, ...) val + +/* + * The use of "&&" / "||" is limited in certain expressions. + * The following enable to calculate "and" / "or" with macro expansion only. + */ +#define __and(x, y) ___and(x, y) +#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) +#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0) + +#define __or(x, y) ___or(x, y) +#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y) +#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y) + +/* + * Helper macros to use CONFIG_ options in C/CPP expressions. Note that + * these only work with boolean and tristate options. + */ + +/* + * Getting something that works in C and CPP for an arg that may or may + * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" + * we match on the placeholder define, insert the "0," for arg1 and generate + * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). + * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when + * the last step cherry picks the 2nd arg, we get a zero. + */ +#define __is_defined(x) ___is_defined(x) +#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) +#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) + +/* + * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 + * otherwise. For boolean options, this is equivalent to + * IS_ENABLED(CONFIG_FOO). + */ +#define IS_BUILTIN(option) __is_defined(option) + +/* + * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 + * otherwise. + */ +#define IS_MODULE(option) __is_defined(option##_MODULE) + +/* + * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled + * code can call a function defined in code compiled based on CONFIG_FOO. + * This is similar to IS_ENABLED(), but returns false when invoked from + * built-in code when CONFIG_FOO is set to 'm'. + */ +#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \ + __and(IS_MODULE(option), __is_defined(MODULE))) + +/* + * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', + * 0 otherwise. + */ +#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) + +#endif /* _TOOLS_LINUX_KCONFIG_H */ diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index ae5662d368b9..5a00b8b2cf9f 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -58,11 +58,25 @@ struct static_call_site { __raw_static_call(name); \ }) +struct static_call_key { + void *func; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; +}; + #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ #define __STATIC_CALL_ADDRESSABLE(name) #define __static_call(name) __raw_static_call(name) +struct static_call_key { + void *func; +}; + #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ #ifdef MODULE @@ -77,6 +91,10 @@ struct static_call_site { #else +struct static_call_key { + void *func; +}; + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt new file mode 100644 index 000000000000..1ab189f51f55 --- /dev/null +++ b/tools/memory-model/Documentation/access-marking.txt @@ -0,0 +1,479 @@ +MARKING SHARED-MEMORY ACCESSES +============================== + +This document provides guidelines for marking intentionally concurrent +normal accesses to shared memory, that is "normal" as in accesses that do +not use read-modify-write atomic operations. It also describes how to +document these accesses, both with comments and with special assertions +processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion +builds on an earlier LWN article [1]. + + +ACCESS-MARKING OPTIONS +====================== + +The Linux kernel provides the following access-marking options: + +1. Plain C-language accesses (unmarked), for example, "a = b;" + +2. Data-race marking, for example, "data_race(a = b);" + +3. READ_ONCE(), for example, "a = READ_ONCE(b);" + The various forms of atomic_read() also fit in here. + +4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" + The various forms of atomic_set() also fit in here. + + +These may be used in combination, as shown in this admittedly improbable +example: + + WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e)); + +Neither plain C-language accesses nor data_race() (#1 and #2 above) place +any sort of constraint on the compiler's choice of optimizations [2]. +In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the +compiler's use of code-motion and common-subexpression optimizations. +Therefore, if a given access is involved in an intentional data race, +using READ_ONCE() for loads and WRITE_ONCE() for stores is usually +preferable to data_race(), which in turn is usually preferable to plain +C-language accesses. + +KCSAN will complain about many types of data races involving plain +C-language accesses, but marking all accesses involved in a given data +race with one of data_race(), READ_ONCE(), or WRITE_ONCE(), will prevent +KCSAN from complaining. Of course, lack of KCSAN complaints does not +imply correct code. Therefore, please take a thoughtful approach +when responding to KCSAN complaints. Churning the code base with +ill-considered additions of data_race(), READ_ONCE(), and WRITE_ONCE() +is unhelpful. + +In fact, the following sections describe situations where use of +data_race() and even plain C-language accesses is preferable to +READ_ONCE() and WRITE_ONCE(). + + +Use of the data_race() Macro +---------------------------- + +Here are some situations where data_race() should be used instead of +READ_ONCE() and WRITE_ONCE(): + +1. Data-racy loads from shared variables whose values are used only + for diagnostic purposes. + +2. Data-racy reads whose values are checked against marked reload. + +3. Reads whose values feed into error-tolerant heuristics. + +4. Writes setting values that feed into error-tolerant heuristics. + + +Data-Racy Reads for Approximate Diagnostics + +Approximate diagnostics include lockdep reports, monitoring/statistics +(including /proc and /sys output), WARN*()/BUG*() checks whose return +values are ignored, and other situations where reads from shared variables +are not an integral part of the core concurrency design. + +In fact, use of data_race() instead READ_ONCE() for these diagnostic +reads can enable better checking of the remaining accesses implementing +the core concurrency design. For example, suppose that the core design +prevents any non-diagnostic reads from shared variable x from running +concurrently with updates to x. Then using plain C-language writes +to x allows KCSAN to detect reads from x from within regions of code +that fail to exclude the updates. In this case, it is important to use +data_race() for the diagnostic reads because otherwise KCSAN would give +false-positive warnings about these diagnostic reads. + +In theory, plain C-language loads can also be used for this use case. +However, in practice this will have the disadvantage of causing KCSAN +to generate false positives because KCSAN will have no way of knowing +that the resulting data race was intentional. + + +Data-Racy Reads That Are Checked Against Marked Reload + +The values from some reads are not implicitly trusted. They are instead +fed into some operation that checks the full value against a later marked +load from memory, which means that the occasional arbitrarily bogus value +is not a problem. For example, if a bogus value is fed into cmpxchg(), +all that happens is that this cmpxchg() fails, which normally results +in a retry. Unless the race condition that resulted in the bogus value +recurs, this retry will with high probability succeed, so no harm done. + +However, please keep in mind that a data_race() load feeding into +a cmpxchg_relaxed() might still be subject to load fusing on some +architectures. Therefore, it is best to capture the return value from +the failing cmpxchg() for the next iteration of the loop, an approach +that provides the compiler much less scope for mischievous optimizations. +Capturing the return value from cmpxchg() also saves a memory reference +in many cases. + +In theory, plain C-language loads can also be used for this use case. +However, in practice this will have the disadvantage of causing KCSAN +to generate false positives because KCSAN will have no way of knowing +that the resulting data race was intentional. + + +Reads Feeding Into Error-Tolerant Heuristics + +Values from some reads feed into heuristics that can tolerate occasional +errors. Such reads can use data_race(), thus allowing KCSAN to focus on +the other accesses to the relevant shared variables. But please note +that data_race() loads are subject to load fusing, which can result in +consistent errors, which in turn are quite capable of breaking heuristics. +Therefore use of data_race() should be limited to cases where some other +code (such as a barrier() call) will force the occasional reload. + +In theory, plain C-language loads can also be used for this use case. +However, in practice this will have the disadvantage of causing KCSAN +to generate false positives because KCSAN will have no way of knowing +that the resulting data race was intentional. + + +Writes Setting Values Feeding Into Error-Tolerant Heuristics + +The values read into error-tolerant heuristics come from somewhere, +for example, from sysfs. This means that some code in sysfs writes +to this same variable, and these writes can also use data_race(). +After all, if the heuristic can tolerate the occasional bogus value +due to compiler-mangled reads, it can also tolerate the occasional +compiler-mangled write, at least assuming that the proper value is in +place once the write completes. + +Plain C-language stores can also be used for this use case. However, +in kernels built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, this +will have the disadvantage of causing KCSAN to generate false positives +because KCSAN will have no way of knowing that the resulting data race +was intentional. + + +Use of Plain C-Language Accesses +-------------------------------- + +Here are some example situations where plain C-language accesses should +used instead of READ_ONCE(), WRITE_ONCE(), and data_race(): + +1. Accesses protected by mutual exclusion, including strict locking + and sequence locking. + +2. Initialization-time and cleanup-time accesses. This covers a + wide variety of situations, including the uniprocessor phase of + system boot, variables to be used by not-yet-spawned kthreads, + structures not yet published to reference-counted or RCU-protected + data structures, and the cleanup side of any of these situations. + +3. Per-CPU variables that are not accessed from other CPUs. + +4. Private per-task variables, including on-stack variables, some + fields in the task_struct structure, and task-private heap data. + +5. Any other loads for which there is not supposed to be a concurrent + store to that same variable. + +6. Any other stores for which there should be neither concurrent + loads nor concurrent stores to that same variable. + + But note that KCSAN makes two explicit exceptions to this rule + by default, refraining from flagging plain C-language stores: + + a. No matter what. You can override this default by building + with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. + + b. When the store writes the value already contained in + that variable. You can override this default by building + with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. + + c. When one of the stores is in an interrupt handler and + the other in the interrupted code. You can override this + default by building with CONFIG_KCSAN_INTERRUPT_WATCHER=y. + +Note that it is important to use plain C-language accesses in these cases, +because doing otherwise prevents KCSAN from detecting violations of your +code's synchronization rules. + + +ACCESS-DOCUMENTATION OPTIONS +============================ + +It is important to comment marked accesses so that people reading your +code, yourself included, are reminded of the synchronization design. +However, it is even more important to comment plain C-language accesses +that are intentionally involved in data races. Such comments are +needed to remind people reading your code, again, yourself included, +of how the compiler has been prevented from optimizing those accesses +into concurrency bugs. + +It is also possible to tell KCSAN about your synchronization design. +For example, ASSERT_EXCLUSIVE_ACCESS(foo) tells KCSAN that any +concurrent access to variable foo by any other CPU is an error, even +if that concurrent access is marked with READ_ONCE(). In addition, +ASSERT_EXCLUSIVE_WRITER(foo) tells KCSAN that although it is OK for there +to be concurrent reads from foo from other CPUs, it is an error for some +other CPU to be concurrently writing to foo, even if that concurrent +write is marked with data_race() or WRITE_ONCE(). + +Note that although KCSAN will call out data races involving either +ASSERT_EXCLUSIVE_ACCESS() or ASSERT_EXCLUSIVE_WRITER() on the one hand +and data_race() writes on the other, KCSAN will not report the location +of these data_race() writes. + + +EXAMPLES +======== + +As noted earlier, the goal is to prevent the compiler from destroying +your concurrent algorithm, to help the human reader, and to inform +KCSAN of aspects of your concurrency design. This section looks at a +few examples showing how this can be done. + + +Lock Protection With Lockless Diagnostic Access +----------------------------------------------- + +For example, suppose a shared variable "foo" is read only while a +reader-writer spinlock is read-held, written only while that same +spinlock is write-held, except that it is also read locklessly for +diagnostic purposes. The code might look as follows: + + int foo; + DEFINE_RWLOCK(foo_rwlock); + + void update_foo(int newval) + { + write_lock(&foo_rwlock); + foo = newval; + do_something(newval); + write_unlock(&foo_rwlock); + } + + int read_foo(void) + { + int ret; + + read_lock(&foo_rwlock); + do_something_else(); + ret = foo; + read_unlock(&foo_rwlock); + return ret; + } + + int read_foo_diagnostic(void) + { + return data_race(foo); + } + +The reader-writer lock prevents the compiler from introducing concurrency +bugs into any part of the main algorithm using foo, which means that +the accesses to foo within both update_foo() and read_foo() can (and +should) be plain C-language accesses. One benefit of making them be +plain C-language accesses is that KCSAN can detect any erroneous lockless +reads from or updates to foo. The data_race() in read_foo_diagnostic() +tells KCSAN that data races are expected, and should be silently +ignored. This data_race() also tells the human reading the code that +read_foo_diagnostic() might sometimes return a bogus value. + +However, please note that your kernel must be built with +CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to +detect a buggy lockless write. If you need KCSAN to detect such a +write even if that write did not change the value of foo, you also +need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to +detect such a write happening in an interrupt handler running on the +same CPU doing the legitimate lock-protected write, you also need +CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig +options set properly, KCSAN can be quite helpful, although it is not +necessarily a full replacement for hardware watchpoints. On the other +hand, neither are hardware watchpoints a full replacement for KCSAN +because it is not always easy to tell hardware watchpoint to conditionally +trap on accesses. + + +Lock-Protected Writes With Lockless Reads +----------------------------------------- + +For another example, suppose a shared variable "foo" is updated only +while holding a spinlock, but is read locklessly. The code might look +as follows: + + int foo; + DEFINE_SPINLOCK(foo_lock); + + void update_foo(int newval) + { + spin_lock(&foo_lock); + WRITE_ONCE(foo, newval); + ASSERT_EXCLUSIVE_WRITER(foo); + do_something(newval); + spin_unlock(&foo_wlock); + } + + int read_foo(void) + { + do_something_else(); + return READ_ONCE(foo); + } + +Because foo is read locklessly, all accesses are marked. The purpose +of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy +concurrent lockless write. + + +Lockless Reads and Writes +------------------------- + +For another example, suppose a shared variable "foo" is both read and +updated locklessly. The code might look as follows: + + int foo; + + int update_foo(int newval) + { + int ret; + + ret = xchg(&foo, newval); + do_something(newval); + return ret; + } + + int read_foo(void) + { + do_something_else(); + return READ_ONCE(foo); + } + +Because foo is accessed locklessly, all accesses are marked. It does +not make sense to use ASSERT_EXCLUSIVE_WRITER() in this case because +there really can be concurrent lockless writers. KCSAN would +flag any concurrent plain C-language reads from foo, and given +CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, also any concurrent plain +C-language writes to foo. + + +Lockless Reads and Writes, But With Single-Threaded Initialization +------------------------------------------------------------------ + +For yet another example, suppose that foo is initialized in a +single-threaded manner, but that a number of kthreads are then created +that locklessly and concurrently access foo. Some snippets of this code +might look as follows: + + int foo; + + void initialize_foo(int initval, int nkthreads) + { + int i; + + foo = initval; + ASSERT_EXCLUSIVE_ACCESS(foo); + for (i = 0; i < nkthreads; i++) + kthread_run(access_foo_concurrently, ...); + } + + /* Called from access_foo_concurrently(). */ + int update_foo(int newval) + { + int ret; + + ret = xchg(&foo, newval); + do_something(newval); + return ret; + } + + /* Also called from access_foo_concurrently(). */ + int read_foo(void) + { + do_something_else(); + return READ_ONCE(foo); + } + +The initialize_foo() uses a plain C-language write to foo because there +are not supposed to be concurrent accesses during initialization. The +ASSERT_EXCLUSIVE_ACCESS() allows KCSAN to flag buggy concurrent unmarked +reads, and the ASSERT_EXCLUSIVE_ACCESS() call further allows KCSAN to +flag buggy concurrent writes, even if: (1) Those writes are marked or +(2) The kernel was built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=y. + + +Checking Stress-Test Race Coverage +---------------------------------- + +When designing stress tests it is important to ensure that race conditions +of interest really do occur. For example, consider the following code +fragment: + + int foo; + + int update_foo(int newval) + { + return xchg(&foo, newval); + } + + int xor_shift_foo(int shift, int mask) + { + int old, new, newold; + + newold = data_race(foo); /* Checked by cmpxchg(). */ + do { + old = newold; + new = (old << shift) ^ mask; + newold = cmpxchg(&foo, old, new); + } while (newold != old); + return old; + } + + int read_foo(void) + { + return READ_ONCE(foo); + } + +If it is possible for update_foo(), xor_shift_foo(), and read_foo() to be +invoked concurrently, the stress test should force this concurrency to +actually happen. KCSAN can evaluate the stress test when the above code +is modified to read as follows: + + int foo; + + int update_foo(int newval) + { + ASSERT_EXCLUSIVE_ACCESS(foo); + return xchg(&foo, newval); + } + + int xor_shift_foo(int shift, int mask) + { + int old, new, newold; + + newold = data_race(foo); /* Checked by cmpxchg(). */ + do { + old = newold; + new = (old << shift) ^ mask; + ASSERT_EXCLUSIVE_ACCESS(foo); + newold = cmpxchg(&foo, old, new); + } while (newold != old); + return old; + } + + + int read_foo(void) + { + ASSERT_EXCLUSIVE_ACCESS(foo); + return READ_ONCE(foo); + } + +If a given stress-test run does not result in KCSAN complaints from +each possible pair of ASSERT_EXCLUSIVE_ACCESS() invocations, the +stress test needs improvement. If the stress test was to be evaluated +on a regular basis, it would be wise to place the above instances of +ASSERT_EXCLUSIVE_ACCESS() under #ifdef so that they did not result in +false positives when not evaluating the stress test. + + +REFERENCES +========== + +[1] "Concurrency bugs should fear the big bad data-race detector (part 2)" + https://lwn.net/Articles/816854/ + +[2] "Who's afraid of a big bad optimizing compiler?" + https://lwn.net/Articles/793253/ diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt index b2da6365be63..6f3d16dbf467 100644 --- a/tools/memory-model/Documentation/glossary.txt +++ b/tools/memory-model/Documentation/glossary.txt @@ -19,7 +19,7 @@ Address Dependency: When the address of a later memory access is computed from the value returned by the rcu_dereference() on line 2, the address dependency extends from that rcu_dereference() to that "p->a". In rare cases, optimizing compilers can destroy address - dependencies. Please see Documentation/RCU/rcu_dereference.txt + dependencies. Please see Documentation/RCU/rcu_dereference.rst for more information. See also "Control Dependency" and "Data Dependency". diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt index 81e1a0ec5342..4c789ec8334f 100644 --- a/tools/memory-model/Documentation/simple.txt +++ b/tools/memory-model/Documentation/simple.txt @@ -189,7 +189,6 @@ Additional information may be found in these files: Documentation/atomic_t.txt Documentation/atomic_bitops.txt -Documentation/core-api/atomic_ops.rst Documentation/core-api/refcount-vs-atomic.rst Reading code using these primitives is often also quite helpful. diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813cff8ab..cedf3ede7545 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,22 +11,15 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" +#define CONFIG_64BIT 1 +#include <asm/nops.h> + #include <asm/orc_types.h> #include <objtool/check.h> #include <objtool/elf.h> #include <objtool/arch.h> #include <objtool/warn.h> - -static unsigned char op_to_cfi_reg[][2] = { - {CFI_AX, CFI_R8}, - {CFI_CX, CFI_R9}, - {CFI_DX, CFI_R10}, - {CFI_BX, CFI_R11}, - {CFI_SP, CFI_R12}, - {CFI_BP, CFI_R13}, - {CFI_SI, CFI_R14}, - {CFI_DI, CFI_R15}, -}; +#include <arch/elf.h> static int is_x86_64(const struct elf *elf) { @@ -83,6 +76,31 @@ unsigned long arch_jump_destination(struct instruction *insn) return -1; \ else for (list_add_tail(&op->list, ops_list); op; op = NULL) +/* + * Helpers to decode ModRM/SIB: + * + * r/m| AX CX DX BX | SP | BP | SI DI | + * | R8 R9 R10 R11 | R12 | R13 | R14 R15 | + * Mod+----------------+-----+-----+---------+ + * 00 | [r/m] |[SIB]|[IP+]| [r/m] | + * 01 | [r/m + d8] |[S+d]| [r/m + d8] | + * 10 | [r/m + d32] |[S+D]| [r/m + d32] | + * 11 | r/ m | + */ + +#define mod_is_mem() (modrm_mod != 3) +#define mod_is_reg() (modrm_mod == 3) + +#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) + +#define rm_is(reg) (have_SIB() ? \ + sib_base == (reg) && sib_index == CFI_SP : \ + modrm_rm == (reg)) + +#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) +#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -90,21 +108,22 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, struct list_head *ops_list) { struct insn insn; - int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, - rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, - modrm_reg = 0, sib = 0; + int x86_64, ret; + unsigned char op1, op2, + rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, + modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, + sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; + u64 imm; x86_64 = is_x86_64(elf); if (x86_64 == -1) return -1; - insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64); - insn_get_length(&insn); - - if (!insn_complete(&insn)) { + ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen, + x86_64 ? INSN_MODE_64 : INSN_MODE_32); + if (ret < 0) { WARN("can't decode instruction at %s:0x%lx", sec->name, offset); return -1; } @@ -129,23 +148,27 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.modrm.nbytes) { modrm = insn.modrm.bytes[0]; modrm_mod = X86_MODRM_MOD(modrm); - modrm_reg = X86_MODRM_REG(modrm); - modrm_rm = X86_MODRM_RM(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r; + modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b; } - if (insn.sib.nbytes) + if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; + /* sib_scale = X86_SIB_SCALE(sib); */ + sib_index = X86_SIB_INDEX(sib) + 8*rex_x; + sib_base = X86_SIB_BASE(sib) + 8*rex_b; + } switch (op1) { case 0x1: case 0x29: - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rex_w && rm_is_reg(CFI_SP)) { /* add/sub reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_ADD; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -157,7 +180,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* push reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->src.reg = (op1 & 0x7) + 8*rex_b; op->dest.type = OP_DEST_PUSH; } @@ -169,7 +192,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, ADD_OP(op) { op->src.type = OP_SRC_POP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.reg = (op1 & 0x7) + 8*rex_b; } break; @@ -187,12 +210,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, *type = INSN_JUMP_CONDITIONAL; break; - case 0x81: - case 0x83: - if (rex != 0x48) + case 0x80 ... 0x83: + /* + * 1000 00sw : mod OP r/m : immediate + * + * s - sign extend immediate + * w - imm8 / imm32 + * + * OP: 000 ADD 100 AND + * 001 OR 101 SUB + * 010 ADC 110 XOR + * 011 SBB 111 CMP + */ + + /* 64bit only */ + if (!rex_w) + break; + + /* %rsp target only */ + if (!rm_is_reg(CFI_SP)) break; - if (modrm == 0xe4) { + imm = insn.immediate.value; + if (op1 & 2) { /* sign extend */ + if (op1 & 1) { /* imm32 */ + imm <<= 32; + imm = (s64)imm >> 32; + } else { /* imm8 */ + imm <<= 56; + imm = (s64)imm >> 56; + } + } + + switch (modrm_reg & 7) { + case 5: + imm = -imm; + /* fallthrough */ + case 0: + /* add/sub imm, %rsp */ + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = imm; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + break; + + case 4: /* and imm, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_AND; @@ -202,53 +267,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; } break; - } - if (modrm == 0xc4) - sign = 1; - else if (modrm == 0xec) - sign = -1; - else + default: + /* WARN ? */ break; - - /* add/sub imm, %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value * sign; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + break; case 0x89: - if (rex_w && !rex_r && modrm_reg == 4) { + if (!rex_w) + break; - if (modrm_mod == 3) { + if (modrm_reg == CFI_SP) { + + if (mod_is_reg()) { /* mov %rsp, reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; } break; } else { - /* skip nontrivial SIB */ - if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) - break; - /* skip RIP relative displacement */ - if (modrm_rm == 5 && modrm_mod == 0) + if (is_RIP()) break; + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + /* mov %rsp, disp(%reg) */ ADD_OP(op) { op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; op->dest.offset = insn.displacement.value; } break; @@ -257,12 +317,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rm_is_reg(CFI_SP)) { /* mov reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -271,13 +331,15 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* fallthrough */ case 0x88: - if (!rex_b && - (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { + if (!rex_w) + break; + + if (rm_is_mem(CFI_BP)) { /* mov reg, disp(%rbp) */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG_INDIRECT; op->dest.reg = CFI_BP; op->dest.offset = insn.displacement.value; @@ -285,12 +347,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { + if (rm_is_mem(CFI_SP)) { /* mov reg, disp(%rsp) */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG_INDIRECT; op->dest.reg = CFI_SP; op->dest.offset = insn.displacement.value; @@ -301,7 +363,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8b: - if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { + if (!rex_w) + break; + + if (rm_is_mem(CFI_BP)) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -309,11 +374,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.reg = CFI_BP; op->src.offset = insn.displacement.value; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.reg = modrm_reg; } + break; + } - } else if (rex_w && !rex_b && sib == 0x24 && - modrm_mod != 3 && modrm_rm == 4) { + if (rm_is_mem(CFI_SP)) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -321,75 +387,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.reg = CFI_SP; op->src.offset = insn.displacement.value; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.reg = modrm_reg; } + break; } break; case 0x8d: - if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - - ADD_OP(op) { - if (!insn.displacement.value) { - /* lea (%rsp), reg */ - op->src.type = OP_SRC_REG; - } else { - /* lea disp(%rsp), reg */ - op->src.type = OP_SRC_ADD; - op->src.offset = insn.displacement.value; - } - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; - } - - } else if (rex == 0x48 && modrm == 0x65) { - - /* lea disp(%rbp), %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + if (mod_is_reg()) { + WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset); + break; + } - } else if (rex == 0x49 && modrm == 0x62 && - insn.displacement.value == -8) { + /* skip non 64bit ops */ + if (!rex_w) + break; - /* - * lea -0x8(%r10), %rsp - * - * Restoring rsp back to its original value after a - * stack realignment. - */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R10; - op->src.offset = -8; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + /* skip RIP relative displacement */ + if (is_RIP()) + break; - } else if (rex == 0x49 && modrm == 0x65 && - insn.displacement.value == -16) { + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } - /* - * lea -0x10(%r13), %rsp - * - * Restoring rsp back to its original value after a - * stack realignment. - */ - ADD_OP(op) { + /* lea disp(%src), %dst */ + ADD_OP(op) { + op->src.offset = insn.displacement.value; + if (!op->src.offset) { + /* lea (%src), %dst */ + op->src.type = OP_SRC_REG; + } else { + /* lea disp(%src), %dst */ op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R13; - op->src.offset = -16; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + op->src.reg = modrm_rm; + op->dest.type = OP_DEST_REG; + op->dest.reg = modrm_reg; } - break; case 0x8f: @@ -476,9 +515,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, * mov bp, sp * pop bp */ - ADD_OP(op) - op->dest.type = OP_DEST_LEAVE; - + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_BP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + } break; case 0xe3: @@ -596,11 +643,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) const char *arch_nop_insn(int len) { static const char nops[5][5] = { - /* 1 */ { 0x90 }, - /* 2 */ { 0x66, 0x90 }, - /* 3 */ { 0x0f, 0x1f, 0x00 }, - /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 }, - /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 }, + { BYTES_NOP1 }, + { BYTES_NOP2 }, + { BYTES_NOP3 }, + { BYTES_NOP4 }, + { BYTES_NOP5 }, }; if (len < 1 || len > 5) { @@ -611,6 +658,122 @@ const char *arch_nop_insn(int len) return nops[len-1]; } +/* asm/alternative.h ? */ + +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +static int elf_add_alternative(struct elf *elf, + struct instruction *orig, struct symbol *sym, + int cpuid, u8 orig_len, u8 repl_len) +{ + const int size = sizeof(struct alt_instr); + struct alt_instr *alt; + struct section *sec; + Elf_Scn *s; + + sec = find_section_by_name(elf, ".altinstructions"); + if (!sec) { + sec = elf_create_section(elf, ".altinstructions", + SHF_WRITE, size, 0); + + if (!sec) { + WARN_ELF("elf_create_section"); + return -1; + } + } + + s = elf_getscn(elf->elf, sec->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + sec->data = elf_newdata(s); + if (!sec->data) { + WARN_ELF("elf_newdata"); + return -1; + } + + sec->data->d_size = size; + sec->data->d_align = 1; + + alt = sec->data->d_buf = malloc(size); + if (!sec->data->d_buf) { + perror("malloc"); + return -1; + } + memset(sec->data->d_buf, 0, size); + + if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, + R_X86_64_PC32, orig->sec, orig->offset)) { + WARN("elf_create_reloc: alt_instr::instr_offset"); + return -1; + } + + if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, + R_X86_64_PC32, sym, 0)) { + WARN("elf_create_reloc: alt_instr::repl_offset"); + return -1; + } + + alt->cpuid = cpuid; + alt->instrlen = orig_len; + alt->replacementlen = repl_len; + + sec->sh.sh_size += size; + sec->changed = true; + + return 0; +} + +#define X86_FEATURE_RETPOLINE ( 7*32+12) + +int arch_rewrite_retpolines(struct objtool_file *file) +{ + struct instruction *insn; + struct reloc *reloc; + struct symbol *sym; + char name[32] = ""; + + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + + if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) + continue; + + reloc = insn->reloc; + + sprintf(name, "__x86_indirect_alt_%s_%s", + insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", + reloc->sym->name + 21); + + sym = find_symbol_by_name(file->elf, name); + if (!sym) { + sym = elf_create_undef_symbol(file->elf, name); + if (!sym) { + WARN("elf_create_undef_symbol"); + return -1; + } + } + + if (elf_add_alternative(file->elf, insn, sym, + ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { + WARN("elf_add_alternative"); + return -1; + } + } + + return 0; +} + int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) { struct cfi_reg *cfa = &insn->cfi.cfa; @@ -646,3 +809,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) return 0; } + +bool arch_is_retpoline(struct symbol *sym) +{ + return !strncmp(sym->name, "__x86_indirect_", 15); +} diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h index 79bc517efba8..0579d22c433c 100644 --- a/tools/objtool/arch/x86/include/arch/cfi_regs.h +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h @@ -4,13 +4,13 @@ #define _OBJTOOL_CFI_REGS_H #define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 +#define CFI_CX 1 +#define CFI_DX 2 #define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 +#define CFI_SP 4 +#define CFI_BP 5 +#define CFI_SI 6 +#define CFI_DI 7 #define CFI_R8 8 #define CFI_R9 9 #define CFI_R10 10 diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h index d818b2bffa02..14271cca0c74 100644 --- a/tools/objtool/arch/x86/include/arch/special.h +++ b/tools/objtool/arch/x86/include/arch/special.h @@ -10,7 +10,7 @@ #define JUMP_ORIG_OFFSET 0 #define JUMP_NEW_OFFSET 4 -#define ALT_ENTRY_SIZE 13 +#define ALT_ENTRY_SIZE 12 #define ALT_ORIG_OFFSET 0 #define ALT_NEW_OFFSET 4 #define ALT_FEATURE_OFFSET 8 diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c3a85d8f6c5c..8b38b5d6fec7 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,16 +15,23 @@ #include <subcmd/parse-options.h> #include <string.h> +#include <stdlib.h> #include <objtool/builtin.h> #include <objtool/objtool.h> -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; static const char * const check_usage[] = { "objtool check [<options>] file.o", NULL, }; +static const char * const env_usage[] = { + "OBJTOOL_ARGS=\"<options>\"", + NULL, +}; + const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), @@ -37,20 +44,44 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), + OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), OPT_END(), }; +int cmd_parse_options(int argc, const char **argv, const char * const usage[]) +{ + const char *envv[16] = { }; + char *env; + int envc; + + env = getenv("OBJTOOL_ARGS"); + if (env) { + envv[0] = "OBJTOOL_ARGS"; + for (envc = 1; envc < ARRAY_SIZE(envv); ) { + envv[envc++] = env; + env = strchr(env, ' '); + if (!env) + break; + *env = '\0'; + env++; + } + + parse_options(envc, envv, check_options, env_usage, 0); + } + + argc = parse_options(argc, argv, check_options, usage, 0); + if (argc != 1) + usage_with_options(usage, check_options); + return argc; +} + int cmd_check(int argc, const char **argv) { const char *objname; struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, check_usage, 0); - - if (argc != 1) - usage_with_options(check_usage, check_options); - + argc = cmd_parse_options(argc, argv, check_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 8273bbf7cebb..17f8b9307738 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv) struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, orc_usage, 0); - if (argc != 1) - usage_with_options(orc_usage, check_options); - + argc = cmd_parse_options(argc, argv, orc_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5e5388a38e2a..9ed1a4cd00dc 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -108,6 +108,18 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_jump_table_jump(struct instruction *insn) +{ + struct alt_group *alt_group = insn->alt_group; + + if (insn->jump_table) + return true; + + /* Retpoline alternative for a jump table? */ + return alt_group && alt_group->orig_group && + alt_group->orig_group->first_insn->jump_table; +} + static bool is_sibling_call(struct instruction *insn) { /* @@ -120,7 +132,7 @@ static bool is_sibling_call(struct instruction *insn) /* An indirect jump is either a sibling call or a jump to a table. */ if (insn->type == INSN_JUMP_DYNAMIC) - return list_empty(&insn->alts); + return !is_jump_table_jump(insn); /* add_jump_destinations() sets insn->call_dest for sibling calls. */ return (is_static_jump(insn) && insn->call_dest); @@ -433,8 +445,7 @@ reachable: static int create_static_call_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; struct static_call_site *site; struct instruction *insn; struct symbol *key_sym; @@ -452,7 +463,7 @@ static int create_static_call_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) + list_for_each_entry(insn, &file->static_call_list, call_node) idx++; sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, @@ -460,36 +471,18 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) - return -1; - idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) { + list_for_each_entry(insn, &file->static_call_list, call_node) { site = (struct static_call_site *)sec->data->d_buf + idx; memset(site, 0, sizeof(struct static_call_site)); /* populate reloc for 'addr' */ - reloc = malloc(sizeof(*reloc)); - - if (!reloc) { - perror("malloc"); - return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); - if (!reloc->sym) { - WARN_FUNC("static call tramp: missing containing symbol", - insn->sec, insn->offset); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(struct static_call_site), + R_X86_64_PC32, + insn->sec, insn->offset)) return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); /* find key symbol */ key_name = strdup(insn->call_dest->name); @@ -526,32 +519,21 @@ static int create_static_call_sections(struct objtool_file *file) free(key_name); /* populate reloc for 'key' */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc(file->elf, sec, + idx * sizeof(struct static_call_site) + 4, + R_X86_64_PC32, key_sym, + is_sibling_call(insn) * STATIC_CALL_SITE_TAIL)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - reloc->sym = key_sym; - reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site) + 4; - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; unsigned long *loc; struct instruction *insn; int idx; @@ -574,49 +556,21 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) - return -1; - idx = 0; list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned long), + R_X86_64_64, + insn->sec, insn->offset)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - if (insn->sec->sym) { - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; - } else { - reloc->sym = find_symbol_containing(insn->sec, insn->offset); - - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx\n", - insn->offset); - return -1; - } - - reloc->addend = insn->offset - reloc->sym->offset; - } - - reloc->type = R_X86_64_64; - reloc->offset = idx * sizeof(unsigned long); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } @@ -850,6 +804,30 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +__weak bool arch_is_retpoline(struct symbol *sym) +{ + return false; +} + +#define NEGATIVE_RELOC ((void *)-1L) + +static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +{ + if (insn->reloc == NEGATIVE_RELOC) + return NULL; + + if (!insn->reloc) { + insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!insn->reloc) { + insn->reloc = NEGATIVE_RELOC; + return NULL; + } + } + + return insn->reloc; +} + /* * Find the destination instructions for all jumps. */ @@ -864,16 +842,14 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || - !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { + } else if (arch_is_retpoline(reloc->sym)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -883,13 +859,16 @@ static int add_jump_destinations(struct objtool_file *file) else insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + list_add_tail(&insn->call_node, + &file->retpoline_call_list); + insn->retpoline_safe = true; continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ insn->call_dest = reloc->sym; if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, + list_add_tail(&insn->call_node, &file->static_call_list); } continue; @@ -951,7 +930,7 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call (without reloc) */ insn->call_dest = insn->jump_dest->func; if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, + list_add_tail(&insn->call_node, &file->static_call_list); } } @@ -995,8 +974,7 @@ static int add_call_destinations(struct objtool_file *file) if (insn->type != INSN_CALL) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); insn->call_dest = find_call_destination(insn->sec, dest_off); @@ -1026,9 +1004,29 @@ static int add_call_destinations(struct objtool_file *file) dest_off); return -1; } + + } else if (arch_is_retpoline(reloc->sym)) { + /* + * Retpoline calls are really dynamic calls in + * disguise, so convert them accordingly. + */ + insn->type = INSN_CALL_DYNAMIC; + insn->retpoline_safe = true; + + list_add_tail(&insn->call_node, + &file->retpoline_call_list); + + remove_insn_ops(insn); + continue; + } else insn->call_dest = reloc->sym; + if (insn->call_dest && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->call_node, + &file->static_call_list); + } + /* * Many compilers cannot disable KCOV with a function attribute * so they need a little help, NOP out any KCOV calls from noinstr @@ -1175,8 +1173,7 @@ static int handle_group_alt(struct objtool_file *file, * alternatives code can adjust the relative offsets * accordingly. */ - alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + alt_reloc = insn_reloc(file, insn); if (alt_reloc && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { @@ -1751,6 +1748,11 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } +__weak int arch_rewrite_retpolines(struct objtool_file *file) +{ + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; @@ -1772,10 +1774,17 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_{jump_call}_destination. + */ ret = read_static_call_tramps(file); if (ret) return ret; + /* + * Must be before add_special_section_alts() as that depends on + * jump_dest being set. + */ ret = add_jump_destinations(file); if (ret) return ret; @@ -1784,6 +1793,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_call_destination(); it changes INSN_CALL to + * INSN_JUMP. + */ ret = read_intra_function_calls(file); if (ret) return ret; @@ -1808,6 +1821,15 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_special_section_alts(), since this will emit + * alternatives. Must be after add_{jump,call}_destination(), since + * those create the call insn lists. + */ + ret = arch_rewrite_retpolines(file); + if (ret) + return ret; + return 0; } @@ -1959,8 +1981,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg) * 41 5d pop %r13 * c3 retq */ -static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, - struct stack_op *op) +static int update_cfi_state(struct instruction *insn, + struct instruction *next_insn, + struct cfi_state *cfi, struct stack_op *op) { struct cfi_reg *cfa = &cfi->cfa; struct cfi_reg *regs = cfi->regs; @@ -2019,7 +2042,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, } else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && - cfa->base == CFI_BP) { + (cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) { /* * mov %rbp, %rsp @@ -2161,7 +2184,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } - if (op->dest.reg == cfi->cfa.base) { + if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) { WARN_FUNC("unsupported stack register modification", insn->sec, insn->offset); return -1; @@ -2216,7 +2239,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, cfa->offset = 0; cfi->drap_offset = -1; - } else if (regs[op->dest.reg].offset == -cfi->stack_size) { + } else if (cfi->stack_size == -regs[op->dest.reg].offset) { /* pop %reg */ restore_reg(cfi, op->dest.reg); @@ -2357,26 +2380,6 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; - case OP_DEST_LEAVE: - if ((!cfi->drap && cfa->base != CFI_BP) || - (cfi->drap && cfa->base != cfi->drap_reg)) { - WARN_FUNC("leave instruction with modified stack frame", - insn->sec, insn->offset); - return -1; - } - - /* leave (mov %rbp, %rsp; pop %rbp) */ - - cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; - restore_reg(cfi, CFI_BP); - - if (!cfi->drap) { - cfa->base = CFI_SP; - cfa->offset -= 8; - } - - break; - case OP_DEST_MEM: if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { WARN_FUNC("unknown stack-related memory operation", @@ -2433,13 +2436,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +static int handle_insn_ops(struct instruction *insn, + struct instruction *next_insn, + struct insn_state *state) { struct stack_op *op; list_for_each_entry(op, &insn->stack_ops, list) { - if (update_cfi_state(insn, &state->cfi, op)) + if (update_cfi_state(insn, next_insn, &state->cfi, op)) return 1; if (!insn->alt_group) @@ -2722,7 +2727,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (handle_insn_ops(insn, &state)) + if (handle_insn_ops(insn, next_insn, &state)) return 1; switch (insn->type) { @@ -2746,11 +2751,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; - if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, - &file->static_call_list); - } - break; case INSN_JUMP_CONDITIONAL: diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 93fa833a49a5..d08f5f3670f8 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -211,32 +211,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns return find_reloc_by_dest_range(elf, sec, offset, 1); } -void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, - struct reloc *reloc) -{ - if (sec->sym) { - reloc->sym = sec->sym; - reloc->addend = offset; - return; - } - - /* - * The Clang assembler strips section symbols, so we have to reference - * the function symbol instead: - */ - reloc->sym = find_symbol_containing(sec, offset); - if (!reloc->sym) { - /* - * Hack alert. This happens when we need to reference the NOP - * pad insn immediately after the function. - */ - reloc->sym = find_symbol_containing(sec, offset - 1); - } - - if (reloc->sym) - reloc->addend = offset - reloc->sym->offset; -} - static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; @@ -316,12 +290,39 @@ static int read_sections(struct elf *elf) return 0; } +static void elf_add_symbol(struct elf *elf, struct symbol *sym) +{ + struct list_head *entry; + struct rb_node *pnode; + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); + pnode = rb_prev(&sym->node); + if (pnode) + entry = &rb_entry(pnode, struct symbol, node)->list; + else + entry = &sym->sec->symbol_list; + list_add(&sym->list, entry); + elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); +} + static int read_symbols(struct elf *elf) { struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; - struct list_head *entry; - struct rb_node *pnode; int symbols_nr, i; char *coldstr; Elf_Data *shndx_data = NULL; @@ -366,9 +367,6 @@ static int read_symbols(struct elf *elf) goto err; } - sym->type = GELF_ST_TYPE(sym->sym.st_info); - sym->bind = GELF_ST_BIND(sym->sym.st_info); - if ((sym->sym.st_shndx > SHN_UNDEF && sym->sym.st_shndx < SHN_LORESERVE) || (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { @@ -381,32 +379,14 @@ static int read_symbols(struct elf *elf) sym->name); goto err; } - if (sym->type == STT_SECTION) { + if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { sym->name = sym->sec->name; sym->sec->sym = sym; } } else sym->sec = find_section_by_index(elf, 0); - sym->offset = sym->sym.st_value; - sym->len = sym->sym.st_size; - - rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); - pnode = rb_prev(&sym->node); - if (pnode) - entry = &rb_entry(pnode, struct symbol, node)->list; - else - entry = &sym->sec->symbol_list; - list_add(&sym->list, entry); - elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); - elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); - - /* - * Don't store empty STT_NOTYPE symbols in the rbtree. They - * can exist within a function, confusing the sorting. - */ - if (!sym->len) - rb_erase(&sym->node, &sym->sec->symbol_tree); + elf_add_symbol(elf, sym); } if (stats) @@ -473,12 +453,73 @@ err: return -1; } -void elf_add_reloc(struct elf *elf, struct reloc *reloc) +static struct section *elf_create_reloc_section(struct elf *elf, + struct section *base, + int reltype); + +int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, int addend) { - struct section *sec = reloc->sec; + struct reloc *reloc; - list_add_tail(&reloc->list, &sec->reloc_list); + if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA)) + return -1; + + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + + reloc->sec = sec->reloc; + reloc->offset = offset; + reloc->type = type; + reloc->sym = sym; + reloc->addend = addend; + + list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + + sec->reloc->changed = true; + + return 0; +} + +int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off) +{ + struct symbol *sym; + int addend; + + if (insn_sec->sym) { + sym = insn_sec->sym; + addend = insn_off; + + } else { + /* + * The Clang assembler strips section symbols, so we have to + * reference the function symbol instead: + */ + sym = find_symbol_containing(insn_sec, insn_off); + if (!sym) { + /* + * Hack alert. This happens when we need to reference + * the NOP pad insn immediately after the function. + */ + sym = find_symbol_containing(insn_sec, insn_off - 1); + } + + if (!sym) { + WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off); + return -1; + } + + addend = insn_off - sym->offset; + } + + return elf_add_reloc(elf, sec, offset, type, sym, addend); } static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) @@ -558,7 +599,9 @@ static int read_relocs(struct elf *elf) return -1; } - elf_add_reloc(elf, reloc); + list_add_tail(&reloc->list, &sec->reloc_list); + elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + nr_reloc++; } max_reloc = max(max_reloc, nr_reloc); @@ -636,13 +679,108 @@ err: return NULL; } +static int elf_add_string(struct elf *elf, struct section *strtab, char *str) +{ + Elf_Data *data; + Elf_Scn *s; + int len; + + if (!strtab) + strtab = find_section_by_name(elf, ".strtab"); + if (!strtab) { + WARN("can't find .strtab section"); + return -1; + } + + s = elf_getscn(elf->elf, strtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return -1; + } + + data->d_buf = str; + data->d_size = strlen(str) + 1; + data->d_align = 1; + + len = strtab->len; + strtab->len += data->d_size; + strtab->changed = true; + + return len; +} + +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +{ + struct section *symtab; + struct symbol *sym; + Elf_Data *data; + Elf_Scn *s; + + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + memset(sym, 0, sizeof(*sym)); + + sym->name = strdup(name); + + sym->sym.st_name = elf_add_string(elf, NULL, sym->name); + if (sym->sym.st_name == -1) + return NULL; + + sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); + // st_other 0 + // st_shndx 0 + // st_value 0 + // st_size 0 + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("can't find .symtab"); + return NULL; + } + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = &sym->sym; + data->d_size = sizeof(sym->sym); + data->d_align = 1; + + sym->idx = symtab->len / sizeof(sym->sym); + + symtab->len += data->d_size; + symtab->changed = true; + + sym->sec = find_section_by_index(elf, 0); + + elf_add_symbol(elf, sym); + + return sym; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; Elf_Scn *s; - Elf_Data *data; sec = malloc(sizeof(*sec)); if (!sec) { @@ -699,7 +837,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_addralign = 1; sec->sh.sh_flags = SHF_ALLOC | sh_flags; - /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); if (!shstrtab) @@ -708,27 +845,9 @@ struct section *elf_create_section(struct elf *elf, const char *name, WARN("can't find .shstrtab or .strtab section"); return NULL; } - - s = elf_getscn(elf->elf, shstrtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); + sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); + if (sec->sh.sh_name == -1) return NULL; - } - - data->d_buf = sec->name; - data->d_size = strlen(name) + 1; - data->d_align = 1; - - sec->sh.sh_name = shstrtab->len; - - shstrtab->len += strlen(name) + 1; - shstrtab->changed = true; list_add_tail(&sec->list, &elf->sections); elf_hash_add(elf->section_hash, &sec->hash, sec->idx); @@ -799,7 +918,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec return sec; } -struct section *elf_create_reloc_section(struct elf *elf, +static struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype) { @@ -873,14 +992,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) return 0; } -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) +static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { struct reloc *reloc; int nr; - sec->changed = true; - elf->changed = true; - nr = 0; list_for_each_entry(reloc, &sec->reloc_list, list) nr++; @@ -944,9 +1060,15 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; - /* Update section headers for changed sections: */ + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -958,6 +1080,7 @@ int elf_write(struct elf *elf) } sec->changed = false; + elf->changed = true; } } diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685f5cc5..062bb6e9b865 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -35,7 +35,6 @@ enum op_dest_type { OP_DEST_MEM, OP_DEST_PUSH, OP_DEST_PUSHF, - OP_DEST_LEAVE, }; struct op_dest { @@ -86,4 +85,8 @@ const char *arch_nop_insn(int len); int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +bool arch_is_retpoline(struct symbol *sym); + +int arch_rewrite_retpolines(struct objtool_file *file); + #endif /* _ARCH_H */ diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 2502bb27de17..15ac0b7d3d6a 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -8,7 +8,10 @@ #include <subcmd/parse-options.h> extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; + +extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index f5be798107bc..56d50bc50c10 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -39,7 +39,7 @@ struct alt_group { struct instruction { struct list_head list; struct hlist_node hash; - struct list_head static_call_node; + struct list_head call_node; struct list_head mcount_loc_node; struct section *sec; unsigned long offset; @@ -56,6 +56,7 @@ struct instruction { struct instruction *jump_dest; struct instruction *first_jump_src; struct reloc *jump_table; + struct reloc *reloc; struct list_head alts; struct symbol *func; struct list_head stack_ops; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e6890cc70a25..45e5ede363b0 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -122,12 +122,18 @@ static inline u32 reloc_hash(struct reloc *reloc) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); -void elf_add_reloc(struct elf *elf, struct reloc *reloc); + +int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, int addend); +int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off); + int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf); @@ -140,9 +146,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); -void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, - struct reloc *reloc); -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ list_for_each_entry(sec, &file->elf->sections, list) diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index e68e37476c15..e4084afb2304 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -18,6 +18,7 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; struct list_head static_call_list; struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 7b97ce499405..e21db8bce493 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -17,6 +17,7 @@ #include <stdbool.h> #include <string.h> #include <stdlib.h> +#include <unistd.h> #include <subcmd/exec-cmd.h> #include <subcmd/pager.h> #include <linux/kernel.h> @@ -44,6 +45,64 @@ bool help; const char *objname; static struct objtool_file file; +static bool objtool_create_backup(const char *_objname) +{ + int len = strlen(_objname); + char *buf, *base, *name = malloc(len+6); + int s, d, l, t; + + if (!name) { + perror("failed backup name malloc"); + return false; + } + + strcpy(name, _objname); + strcpy(name + len, ".orig"); + + d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (d < 0) { + perror("failed to create backup file"); + return false; + } + + s = open(_objname, O_RDONLY); + if (s < 0) { + perror("failed to open orig file"); + return false; + } + + buf = malloc(4096); + if (!buf) { + perror("failed backup data malloc"); + return false; + } + + while ((l = read(s, buf, 4096)) > 0) { + base = buf; + do { + t = write(d, base, l); + if (t < 0) { + perror("failed backup write"); + return false; + } + base += t; + l -= t; + } while (l); + } + + if (l < 0) { + perror("failed backup read"); + return false; + } + + free(name); + free(buf); + close(d); + close(s); + + return true; +} + struct objtool_file *objtool_open_read(const char *_objname) { if (objname) { @@ -59,8 +118,14 @@ struct objtool_file *objtool_open_read(const char *_objname) if (!file.elf) return NULL; + if (backup && !objtool_create_backup(objname)) { + WARN("can't create backup file"); + return NULL; + } + INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 738aa5021bc4..dc9b7dd314b0 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -82,12 +82,11 @@ static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) } static int write_orc_entry(struct elf *elf, struct section *orc_sec, - struct section *ip_rsec, unsigned int idx, + struct section *ip_sec, unsigned int idx, struct section *insn_sec, unsigned long insn_off, struct orc_entry *o) { struct orc_entry *orc; - struct reloc *reloc; /* populate ORC data */ orc = (struct orc_entry *)orc_sec->data->d_buf + idx; @@ -96,25 +95,9 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec, orc->bp_offset = bswap_if_needed(orc->bp_offset); /* populate reloc for ip */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32, + insn_sec, insn_off)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx", - insn_off); - return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(int); - reloc->sec = ip_rsec; - - elf_add_reloc(elf, reloc); return 0; } @@ -153,7 +136,7 @@ static unsigned long alt_group_len(struct alt_group *alt_group) int orc_create(struct objtool_file *file) { - struct section *sec, *ip_rsec, *orc_sec; + struct section *sec, *orc_sec; unsigned int nr = 0, idx = 0; struct orc_list_entry *entry; struct list_head orc_list; @@ -242,20 +225,14 @@ int orc_create(struct objtool_file *file) sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); if (!sec) return -1; - ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!ip_rsec) - return -1; /* Write ORC entries to sections: */ list_for_each_entry(entry, &orc_list, list) { - if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++, + if (write_orc_entry(file->elf, orc_sec, sec, idx++, entry->insn_sec, entry->insn_off, &entry->orc)) return -1; } - if (elf_rebuild_reloc_section(file->elf, ip_rsec)) - return -1; - return 0; } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 2c7fbda7b055..07b21cfabf5c 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } + /* + * Skip retpoline .altinstr_replacement... we already rewrite the + * instructions for retpolines anyway, see arch_is_retpoline() + * usage in add_{call,jump}_destinations(). + */ + if (arch_is_retpoline(new_reloc->sym)) + return 1; + alt->new_sec = new_reloc->sym->sec; alt->new_off = (unsigned int)new_reloc->addend; @@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts) memset(alt, 0, sizeof(*alt)); ret = get_alt_entry(elf, entry, sec, idx, alt); - if (ret) + if (ret > 0) + continue; + if (ret < 0) return ret; list_add_tail(&alt->list, alts); diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 606a4b5e929f..105a291ff8e7 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -10,17 +10,21 @@ FILES="include/linux/objtool.h" if [ "$SRCARCH" = "x86" ]; then FILES="$FILES +arch/x86/include/asm/nops.h arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk include/linux/static_call_types.h -arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]' -arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' -arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' -arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' " + +SYNC_CHECK_FILES=' +arch/x86/include/asm/inat.h +arch/x86/include/asm/insn.h +arch/x86/lib/inat.c +arch/x86/lib/insn.c +' fi check_2 () { @@ -63,3 +67,9 @@ while read -r file_entry; do done <<EOF $FILES EOF + +if [ "$SRCARCH" = "x86" ]; then + for i in $SYNC_CHECK_FILES; do + check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"' + done +fi diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index 4f75ae990140..0262b0d8ccf5 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -96,13 +96,12 @@ static int get_branch(const char *branch_str) static int test_data_item(struct test_data *dat, int x86_64) { struct intel_pt_insn intel_pt_insn; + int op, branch, ret; struct insn insn; - int op, branch; - insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64); - insn_get_length(&insn); - - if (!insn_complete(&insn)) { + ret = insn_decode(&insn, dat->data, MAX_INSN_SIZE, + x86_64 ? INSN_MODE_64 : INSN_MODE_32); + if (ret < 0) { pr_debug("Failed to decode: %s\n", dat->asm_rep); return -1; } diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 34d600c51044..546feda08428 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -11,7 +11,7 @@ void arch_fetch_insn(struct perf_sample *sample, struct machine *machine) { struct insn insn; - int len; + int len, ret; bool is64bit = false; if (!sample->ip) @@ -19,8 +19,9 @@ void arch_fetch_insn(struct perf_sample *sample, len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit); if (len <= 0) return; - insn_init(&insn, sample->insn, len, is64bit); - insn_get_length(&insn); - if (insn_complete(&insn) && insn.length <= len) + + ret = insn_decode(&insn, sample->insn, len, + is64bit ? INSN_MODE_64 : INSN_MODE_32); + if (ret >= 0 && insn.length <= len) sample->insn_len = insn.length; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index a6420c647959..6df0dc00d73a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -776,6 +776,12 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } } + if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) { + u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4; + + intel_pt_evsel->core.attr.aux_watermark = aux_watermark; + } + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, "tsc", &tsc_bit); diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d49448a1060c..87cb11a7a3ee 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -289,7 +289,7 @@ static int set_tracing_pid(struct perf_ftrace *ftrace) for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) { scnprintf(buf, sizeof(buf), "%d", - ftrace->evlist->core.threads->map[i]); + perf_thread_map__pid(ftrace->evlist->core.threads, i)); if (append_tracing_file("set_ftrace_pid", buf) < 0) return -1; } diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dded93a2bc89..07857dfb4d91 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/unistd.h ' +SYNC_CHECK_FILES=' +arch/x86/include/asm/inat.h +arch/x86/include/asm/insn.h +arch/x86/lib/inat.c +arch/x86/lib/insn.c +' + # These copies are under tools/perf/trace/beauty/ as they are not used to in # building object files only by scripts in tools/perf/trace/beauty/ to generate # tables that then gets included in .c files for things like id->string syscall @@ -129,6 +136,10 @@ for i in $FILES; do check $i -B done +for i in $SYNC_CHECK_FILES; do + check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"' +done + # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' @@ -137,10 +148,6 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' -check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' -check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' -check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh index 27ee1ea1fe94..9b0614a87831 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h printf "static const char *x86_MSRs[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' -egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \ +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \ sed -r "s/$regex/\2 \1/g" | sort -n | \ xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5b6ccb90b397..1b4091a3b508 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -634,7 +634,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, break; } - if (itr) + if (itr && itr->parse_snapshot_options) return itr->parse_snapshot_options(itr, opts, str); pr_err("No AUX area tracing to snapshot\n"); diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index f29af4fc3d09..8fca4779ae6a 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data) int perf_data__create_dir(struct perf_data *data, int nr) { struct perf_data_file *files = NULL; - int i, ret = -1; + int i, ret; if (WARN_ON(!data->is_dir)) return -EINVAL; @@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr) for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; - if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + ret = asprintf(&file->path, "%s/data.%d", data->path, i); + if (ret < 0) goto out_err; ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 2f6cc7eea251..593f20e9774c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -169,11 +169,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, struct intel_pt_insn *intel_pt_insn) { struct insn insn; + int ret; - insn_init(&insn, buf, len, x86_64); - insn_get_length(&insn); - if (!insn_complete(&insn) || insn.length > len) + ret = insn_decode(&insn, buf, len, + x86_64 ? INSN_MODE_64 : INSN_MODE_32); + if (ret < 0 || insn.length > len) return -1; + intel_pt_insn_decoder(&insn, intel_pt_insn); if (insn.length < INTEL_PT_INSN_BUF_SZ) memcpy(intel_pt_insn->buf, buf, insn.length); @@ -194,12 +196,13 @@ const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, u8 *inbuf, int inlen, int *lenp) { struct insn insn; - int n, i; + int n, i, ret; int left; - insn_init(&insn, inbuf, inlen, x->is64bit); - insn_get_length(&insn); - if (!insn_complete(&insn) || insn.length > inlen) + ret = insn_decode(&insn, inbuf, inlen, + x->is64bit ? INSN_MODE_64 : INSN_MODE_32); + + if (ret < 0 || insn.length > inlen) return "<bad>"; if (lenp) *lenp = insn.length; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fbc40a2c17d4..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -840,15 +840,18 @@ out: int maps__clone(struct thread *thread, struct maps *parent) { struct maps *maps = thread->maps; - int err = -ENOMEM; + int err; struct map *map; down_read(&parent->lock); maps__for_each_entry(parent, map) { struct map *new = map__clone(map); - if (new == NULL) + + if (new == NULL) { + err = -ENOMEM; goto out_unlock; + } err = unwind__prepare_access(maps, new, NULL); if (err) diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 9ea2c0aeb86c..185b8c588e1d 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /****************************************************************************** * - * Module Name: cfsize - Common get file size function + * Module Name: cmfsize - Common get file size function * * Copyright (C) 2000 - 2021, Intel Corp. * diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 81f4b8abbdf7..ffd50953a024 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -6819,7 +6819,7 @@ if __name__ == '__main__': sysvals.outdir = val sysvals.notestrun = True if(os.path.isdir(val) == False): - doError('%s is not accesible' % val) + doError('%s is not accessible' % val) elif(arg == '-filter'): try: val = next(args) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 582feb88eca3..ab940c508ef0 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.8"; +static const char *version_str = "v1.9"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -381,6 +381,18 @@ static void set_cpu_online_offline(int cpu, int state) close(fd); } +static void force_all_cpus_online(void) +{ + int i; + + fprintf(stderr, "Forcing all CPUs online\n"); + + for (i = 0; i < topo_max_cpus; ++i) + set_cpu_online_offline(i, 1); + + unlink("/var/run/isst_cpu_topology.dat"); +} + #define MAX_PACKAGE_COUNT 8 #define MAX_DIE_PER_PACKAGE 2 static void for_each_online_package_in_set(void (*callback)(int, void *, void *, @@ -959,6 +971,10 @@ static void isst_print_extended_platform_info(void) fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n"); ret = isst_read_pm_config(i, &cp_state, &cp_cap); + if (ret) { + fprintf(outf, "Intel(R) SST-CP (feature core-power) status is unknown\n"); + return; + } if (cp_cap) fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n"); else @@ -2763,6 +2779,7 @@ static void usage(void) printf("\t[-f|--format] : output format [json|text]. Default: text\n"); printf("\t[-h|--help] : Print help\n"); printf("\t[-i|--info] : Print platform information\n"); + printf("\t[-a|--all-cpus-online] : Force online every CPU in the system\n"); printf("\t[-o|--out] : Output file\n"); printf("\t\t\tDefault : stderr\n"); printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n"); @@ -2791,7 +2808,6 @@ static void usage(void) static void print_version(void) { fprintf(outf, "Version %s\n", version_str); - fprintf(outf, "Build date %s time %s\n", __DATE__, __TIME__); exit(0); } @@ -2800,11 +2816,12 @@ static void cmdline(int argc, char **argv) const char *pathname = "/dev/isst_interface"; char *ptr; FILE *fp; - int opt; + int opt, force_cpus_online = 0; int option_index = 0; int ret; static struct option long_options[] = { + { "all-cpus-online", no_argument, 0, 'a' }, { "cpu", required_argument, 0, 'c' }, { "debug", no_argument, 0, 'd' }, { "format", required_argument, 0, 'f' }, @@ -2840,9 +2857,12 @@ static void cmdline(int argc, char **argv) } progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options, + while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options, &option_index)) != -1) { switch (opt) { + case 'a': + force_cpus_online = 1; + break; case 'c': parse_cpu_command(optarg); break; @@ -2892,6 +2912,8 @@ static void cmdline(int argc, char **argv) exit(0); } set_max_cpu_num(); + if (force_cpus_online) + force_all_cpus_online(); store_cpu_topology(); set_cpu_present_cpu_mask(); set_cpu_target_cpu_mask(); diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 8e54ce47648e..3bf1820c0da1 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -25,10 +25,14 @@ static void printcpulist(int str_len, char *str, int mask_size, index = snprintf(&str[curr_index], str_len - curr_index, ","); curr_index += index; + if (curr_index >= str_len) + break; } index = snprintf(&str[curr_index], str_len - curr_index, "%d", i); curr_index += index; + if (curr_index >= str_len) + break; first = 0; } } @@ -64,10 +68,14 @@ static void printcpumask(int str_len, char *str, int mask_size, index = snprintf(&str[curr_index], str_len - curr_index, "%08x", mask[i]); curr_index += index; + if (curr_index >= str_len) + break; if (i) { strncat(&str[curr_index], ",", str_len - curr_index); curr_index++; } + if (curr_index >= str_len) + break; } free(mask); @@ -185,7 +193,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level, int disp_level) { char header[256]; - char value[256]; + char value[512]; snprintf(header, sizeof(header), "speed-select-base-freq-properties"); format_and_print(outf, disp_level, header, NULL); @@ -349,7 +357,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, struct isst_pkg_ctdp *pkg_dev) { char header[256]; - char value[256]; + char value[512]; static int level; int i; diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a7c4f0772e53..5939615265f1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2449,7 +2449,7 @@ dump_knl_turbo_ratio_limits(void) fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); - /** + /* * Turbo encoding in KNL is as follows: * [0] -- Reserved * [7:1] -- Base value of number of active cores of bucket 1. diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 91130648d8e6..25adfec2cb39 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -94,7 +94,8 @@ endif # in newer systems. # Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h # -# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html, +# See https://lore.kernel.org/lkml/9a8748490611281710g78402fbeh8ff7fcc162dbcbca@mail.gmail.com/ +# and https://gcc.gnu.org/gcc-4.8/changes.html, # that takes into account Linus's comments (search for Wshadow) for the reasoning about # -Wshadow not being interesting before gcc 4.8. diff --git a/tools/spi/Makefile b/tools/spi/Makefile index ada881afb489..0aa6dbd31fb8 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -25,11 +25,12 @@ include $(srctree)/tools/build/Makefile.include # # We need the following to be outside of kernel tree # -$(OUTPUT)include/linux/spi/spidev.h: ../../include/uapi/linux/spi/spidev.h +$(OUTPUT)include/linux/spi: ../../include/uapi/linux/spi mkdir -p $(OUTPUT)include/linux/spi 2>&1 || true ln -sf $(CURDIR)/../../include/uapi/linux/spi/spidev.h $@ + ln -sf $(CURDIR)/../../include/uapi/linux/spi/spi.h $@ -prepare: $(OUTPUT)include/linux/spi/spidev.h +prepare: $(OUTPUT)include/linux/spi # # spidev_test diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index d5144fcb03ac..5da8fb3762f9 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -184,7 +184,9 @@ def add_common_opts(parser) -> None: help='Run all KUnit tests through allyesconfig', action='store_true') parser.add_argument('--kunitconfig', - help='Path to Kconfig fragment that enables KUnit tests', + help='Path to Kconfig fragment that enables KUnit tests.' + ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' + 'will get automatically appended.', metavar='kunitconfig') def add_build_opts(parser) -> None: diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index f309a33256cd..89a7d4024e87 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -132,6 +132,8 @@ class LinuxSourceTree(object): return if kunitconfig_path: + if os.path.isdir(kunitconfig_path): + kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH) if not os.path.exists(kunitconfig_path): raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') else: diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 1ad3049e9069..2e809dd956a7 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -251,6 +251,12 @@ class LinuxSourceTreeTest(unittest.TestCase): with tempfile.NamedTemporaryFile('wt') as kunitconfig: tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + def test_dir_kunitconfig(self): + with tempfile.TemporaryDirectory('') as dir: + with open(os.path.join(dir, '.kunitconfig'), 'w') as f: + pass + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + # TODO: add more test cases. diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 2c9d012797a7..ced910fb4019 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile new file mode 100644 index 000000000000..73e013c082a6 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := btitest nobtitest + +PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) + +# These tests are built as freestanding binaries since otherwise BTI +# support in ld.so is required which is not currently widespread; when +# it is available it will still be useful to test this separately as the +# cases for statically linked and dynamically lined binaries are +# slightly different. + +CFLAGS_NOBTI = -DBTI=0 +CFLAGS_BTI = -mbranch-protection=standard -DBTI=1 + +CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) + +BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< +NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< + +%-bti.o: %.c + $(BTI_CC_COMMAND) + +%-bti.o: %.S + $(BTI_CC_COMMAND) + +%-nobti.o: %.c + $(NOBTI_CC_COMMAND) + +%-nobti.o: %.S + $(NOBTI_CC_COMMAND) + +BTI_OBJS = \ + test-bti.o \ + signal-bti.o \ + start-bti.o \ + syscall-bti.o \ + system-bti.o \ + teststubs-bti.o \ + trampoline-bti.o +gen/btitest: $(BTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +NOBTI_OBJS = \ + test-nobti.o \ + signal-nobti.o \ + start-nobti.o \ + syscall-nobti.o \ + system-nobti.o \ + teststubs-nobti.o \ + trampoline-nobti.o +gen/nobtitest: $(NOBTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h new file mode 100644 index 000000000000..04e7b72880ef --- /dev/null +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + + +.macro startfn name:req + .globl \name +\name: + .macro endfn + .size \name, . - \name + .type \name, @function + .purgem endfn + .endm +.endm + +.macro emit_aarch64_feature_1_and + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: +#if BTI + .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \ + GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else + .long 0 +#endif +5: + .align 3 +6: + .popsection +.endm + +.macro paciasp + hint 0x19 +.endm + +.macro autiasp + hint 0x1d +.endm + +.macro __bti_ + hint 0x20 +.endm + +.macro __bti_c + hint 0x22 +.endm + +.macro __bti_j + hint 0x24 +.endm + +.macro __bti_jc + hint 0x26 +.endm + +.macro bti what= + __bti_\what +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h new file mode 100644 index 000000000000..2aff9b10336e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/btitest.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef BTITEST_H +#define BTITEST_H + +/* Trampolines for calling the test stubs: */ +void call_using_br_x0(void (*)(void)); +void call_using_br_x16(void (*)(void)); +void call_using_blr(void (*)(void)); + +/* Test stubs: */ +void nohint_func(void); +void bti_none_func(void); +void bti_c_func(void); +void bti_j_func(void); +void bti_jc_func(void); +void paciasp_func(void); + +#endif /* !BTITEST_H */ diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h new file mode 100644 index 000000000000..ebb6204f447a --- /dev/null +++ b/tools/testing/selftests/arm64/bti/compiler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#define __always_unused __attribute__((__unused__)) +#define __noreturn __attribute__((__noreturn__)) +#define __unreachable() __builtin_unreachable() + +/* curse(e) has value e, but the compiler cannot assume so */ +#define curse(e) ({ \ + __typeof__(e) __curse_e = (e); \ + asm ("" : "+r" (__curse_e)); \ + __curse_e; \ +}) + +#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/gen/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c new file mode 100644 index 000000000000..f3fd29b91141 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" +#include "signal.h" + +int sigemptyset(sigset_t *s) +{ + unsigned int i; + + for (i = 0; i < _NSIG_WORDS; ++i) + s->sig[i] = 0; + + return 0; +} + +int sigaddset(sigset_t *s, int n) +{ + if (n < 1 || n > _NSIG) + return -EINVAL; + + s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW; + return 0; +} + +int sigaction(int n, struct sigaction *sa, const struct sigaction *old) +{ + return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask)); +} + +int sigprocmask(int how, const sigset_t *mask, sigset_t *old) +{ + return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask)); +} diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h new file mode 100644 index 000000000000..103457dc880e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef SIGNAL_H +#define SIGNAL_H + +#include <linux/signal.h> + +#include "system.h" + +typedef __sighandler_t sighandler_t; + +int sigemptyset(sigset_t *s); +int sigaddset(sigset_t *s, int n); +int sigaction(int n, struct sigaction *sa, const struct sigaction *old); +int sigprocmask(int how, const sigset_t *mask, sigset_t *old); + +#endif /* ! SIGNAL_H */ diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S new file mode 100644 index 000000000000..831f952e0572 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/start.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn _start + mov x0, sp + b start +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S new file mode 100644 index 000000000000..8dde8b6f3db1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/syscall.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn syscall + bti c + mov w8, w0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + mov x4, x5 + mov x5, x6 + mov x6, x7 + svc #0 + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c new file mode 100644 index 000000000000..6385d8d4973b --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" + +#include <asm/unistd.h> + +#include "compiler.h" + +void __noreturn exit(int n) +{ + syscall(__NR_exit, n); + __unreachable(); +} + +ssize_t write(int fd, const void *buf, size_t size) +{ + return syscall(__NR_write, fd, buf, size); +} diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h new file mode 100644 index 000000000000..aca118589705 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +#include <linux/types.h> +#include <linux/stddef.h> + +typedef __kernel_size_t size_t; +typedef __kernel_ssize_t ssize_t; + +#include <linux/errno.h> +#include <asm/hwcap.h> +#include <asm/ptrace.h> +#include <asm/unistd.h> + +#include "compiler.h" + +long syscall(int nr, ...); + +void __noreturn exit(int n); +ssize_t write(int fd, const void *buf, size_t size); + +#endif /* ! SYSTEM_H */ diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c new file mode 100644 index 000000000000..656b04976ccc --- /dev/null +++ b/tools/testing/selftests/arm64/bti/test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019,2021 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" + +#include <linux/errno.h> +#include <linux/auxvec.h> +#include <linux/signal.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> + +typedef struct ucontext ucontext_t; + +#include "btitest.h" +#include "compiler.h" +#include "signal.h" + +#define EXPECTED_TESTS 18 + +static volatile unsigned int test_num = 1; +static unsigned int test_passed; +static unsigned int test_failed; +static unsigned int test_skipped; + +static void fdputs(int fd, const char *str) +{ + size_t len = 0; + const char *p = str; + + while (*p++) + ++len; + + write(fd, str, len); +} + +static void putstr(const char *str) +{ + fdputs(1, str); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +#define puttestname(test_name, trampoline_name) do { \ + putstr(test_name); \ + putstr("/"); \ + putstr(trampoline_name); \ +} while (0) + +void print_summary(void) +{ + putstr("# Totals: pass:"); + putnum(test_passed); + putstr(" fail:"); + putnum(test_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(test_skipped); + putstr(" error:0\n"); +} + +static const char *volatile current_test_name; +static const char *volatile current_trampoline_name; +static volatile int sigill_expected, sigill_received; + +static void handler(int n, siginfo_t *si __always_unused, + void *uc_ __always_unused) +{ + ucontext_t *uc = uc_; + + putstr("# \t[SIGILL in "); + puttestname(current_test_name, current_trampoline_name); + putstr(", BTYPE="); + write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK) + >> PSR_BTYPE_SHIFT) * 2], 2); + if (!sigill_expected) { + putstr("]\n"); + putstr("not ok "); + putnum(test_num); + putstr(" "); + puttestname(current_test_name, current_trampoline_name); + putstr("(unexpected SIGILL)\n"); + print_summary(); + exit(128 + n); + } + + putstr(" (expected)]\n"); + sigill_received = 1; + /* zap BTYPE so that resuming the faulting code will work */ + uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK; +} + +static int skip_all; + +static void __do_test(void (*trampoline)(void (*)(void)), + void (*fn)(void), + const char *trampoline_name, + const char *name, + int expect_sigill) +{ + if (skip_all) { + test_skipped++; + putstr("ok "); + putnum(test_num); + putstr(" "); + puttestname(name, trampoline_name); + putstr(" # SKIP\n"); + + return; + } + + /* Branch Target exceptions should only happen in BTI binaries: */ + if (!BTI) + expect_sigill = 0; + + sigill_expected = expect_sigill; + sigill_received = 0; + current_test_name = name; + current_trampoline_name = trampoline_name; + + trampoline(fn); + + if (expect_sigill && !sigill_received) { + putstr("not ok "); + test_failed++; + } else { + putstr("ok "); + test_passed++; + } + putnum(test_num++); + putstr(" "); + puttestname(name, trampoline_name); + putstr("\n"); +} + +#define do_test(expect_sigill_br_x0, \ + expect_sigill_br_x16, \ + expect_sigill_blr, \ + name) \ +do { \ + __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \ + expect_sigill_br_x0); \ + __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \ + expect_sigill_br_x16); \ + __do_test(call_using_blr, name, "call_using_blr", #name, \ + expect_sigill_blr); \ +} while (0) + +void start(int *argcp) +{ + struct sigaction sa; + void *const *p; + const struct auxv_entry { + unsigned long type; + unsigned long val; + } *auxv; + unsigned long hwcap = 0, hwcap2 = 0; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + /* Gross hack for finding AT_HWCAP2 from the initial process stack: */ + p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */ + /* step over environment */ + while (*p++) + ; + for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) { + switch (auxv->type) { + case AT_HWCAP: + hwcap = auxv->val; + break; + case AT_HWCAP2: + hwcap2 = auxv->val; + break; + default: + break; + } + } + + if (hwcap & HWCAP_PACA) + putstr("# HWCAP_PACA present\n"); + else + putstr("# HWCAP_PACA not present\n"); + + if (hwcap2 & HWCAP2_BTI) { + putstr("# HWCAP2_BTI present\n"); + if (!(hwcap & HWCAP_PACA)) + putstr("# Bad hardware? Expect problems.\n"); + } else { + putstr("# HWCAP2_BTI not present\n"); + skip_all = 1; + } + + putstr("# Test binary"); + if (!BTI) + putstr(" not"); + putstr(" built for BTI\n"); + + sa.sa_handler = (sighandler_t)(void *)handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGILL, &sa, NULL); + sigaddset(&sa.sa_mask, SIGILL); + sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + + do_test(1, 1, 1, nohint_func); + do_test(1, 1, 1, bti_none_func); + do_test(1, 0, 0, bti_c_func); + do_test(0, 0, 1, bti_j_func); + do_test(0, 0, 0, bti_jc_func); + do_test(1, 0, 0, paciasp_func); + + print_summary(); + + if (test_num - 1 != EXPECTED_TESTS) + putstr("# WARNING - EXPECTED TEST COUNT WRONG\n"); + + if (test_failed) + exit(1); + else + exit(0); +} diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S new file mode 100644 index 000000000000..b62c8c35f67e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/teststubs.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn bti_none_func + bti + ret +endfn + +startfn bti_c_func + bti c + ret +endfn + +startfn bti_j_func + bti j + ret +endfn + +startfn bti_jc_func + bti jc + ret +endfn + +startfn paciasp_func + paciasp + autiasp + ret +endfn + +startfn nohint_func + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S new file mode 100644 index 000000000000..09beb3f361f1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/trampoline.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn call_using_br_x0 + bti c + br x0 +endfn + +startfn call_using_br_x16 + bti c + mov x16, x0 + br x16 +endfn + +startfn call_using_blr + paciasp + stp x29, x30, [sp, #-16]! + blr x0 + ldp x29, x30, [sp], #16 + autiasp + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 0b3af552632a..409e3e53d00a 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,14 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -CFLAGS += -std=gnu99 -I. -lpthread +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -std=gnu99 -I. -pthread +LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) #Add mte compiler option -ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) CFLAGS += -march=armv8.5-a+memtag -endif #check if the compiler works well mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) @@ -19,11 +23,14 @@ TEST_GEN_PROGS := $(PROGS) # Get Kernel headers installed and use them. KSFT_KHDR_INSTALL := 1 +else + $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.) + $(warning test program "mte" will not be created.) endif # Include KSFT lib.mk. include ../../lib.mk ifeq ($(mte_cc_support),1) -$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S endif diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 3b23c4d61d38..88c74bc46d4f 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str) ksft_print_msg("ERR: missing %s\n", str); return 0; } - fscanf(f, "%lu", &val); + if (fscanf(f, "%lu", &val) != 1) { + ksft_print_msg("ERR: parsing %s\n", str); + val = 0; + } fclose(f); return val; } diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 4bfa80f2a8c3..1de7a0abd0ae 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (fd == -1) return KSFT_FAIL; for (i = 0; i < len; i++) - write(fd, &val, sizeof(val)); + if (write(fd, &val, sizeof(val)) != sizeof(val)) + return KSFT_FAIL; lseek(fd, 0, 0); ptr = mte_allocate_memory(len, mem_type, mapping, true); if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 39f8908988ea..f50ac31920d1 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags } /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); - for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, size - index); + if (write(fd, buffer, size - index) != size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); } @@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, map_size - index); + if (write(fd, buffer, map_size - index) != map_size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, range_after, true, fd); } @@ -271,29 +284,20 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ - if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); return -EINVAL; } return 0; } -#define ID_AA64PFR1_MTE_SHIFT 8 -#define ID_AA64PFR1_MTE 2 - int mte_default_setup(void) { - unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); unsigned long en = 0; int ret; - if (!(hwcaps & HWCAP_CPUID)) { - ksft_print_msg("FAIL: CPUID registers unavailable\n"); - return KSFT_FAIL; - } - /* Read ID_AA64PFR1_EL1 register */ - asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); - if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + if (!(hwcaps2 & HWCAP2_MTE)) { ksft_print_msg("FAIL: MTE features unavailable\n"); return KSFT_SKIP; } @@ -333,6 +337,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { + perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c index 5ebc1aec7923..0e393cb5f42d 100644 --- a/tools/testing/selftests/firmware/fw_namespace.c +++ b/tools/testing/selftests/firmware/fw_namespace.c @@ -95,7 +95,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_ } if (block_fw_in_parent_ns) umount("/lib/firmware"); - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } if (unshare(CLONE_NEWNS) != 0) { diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 9a41d8bb9ff1..0af84ad48aa7 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -78,7 +78,8 @@ ifdef building_out_of_srctree rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi @if [ "X$(TEST_PROGS)" != "X" ]; then \ - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \ + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ + $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \ else \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \ fi diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore index f26212605b6b..d4b0be857deb 100644 --- a/tools/testing/selftests/lkdtm/.gitignore +++ b/tools/testing/selftests/lkdtm/.gitignore @@ -1,2 +1,3 @@ *.sh !run.sh +!stack-entropy.sh diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile index 1bcc9ee990eb..c71109ceeb2d 100644 --- a/tools/testing/selftests/lkdtm/Makefile +++ b/tools/testing/selftests/lkdtm/Makefile @@ -5,6 +5,7 @@ include ../lib.mk # NOTE: $(OUTPUT) won't get default value if used before lib.mk TEST_FILES := tests.txt +TEST_PROGS := stack-entropy.sh TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//')) all: $(TEST_GEN_PROGS) diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh new file mode 100755 index 000000000000..b1b8a5097cbb --- /dev/null +++ b/tools/testing/selftests/lkdtm/stack-entropy.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. +set -e +samples="${1:-1000}" + +# Capture dmesg continuously since it may fill up depending on sample size. +log=$(mktemp -t stack-entropy-XXXXXX) +dmesg --follow >"$log" & pid=$! +report=-1 +for i in $(seq 1 $samples); do + echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT + if [ -t 1 ]; then + percent=$(( 100 * $i / $samples )) + if [ "$percent" -ne "$report" ]; then + /bin/echo -en "$percent%\r" + report="$percent" + fi + fi +done +kill "$pid" + +# Count unique offsets since last run. +seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \ + grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l) +bits=$(echo "obase=2; $seen" | bc | wc -L) +echo "Bits of stack entropy: $bits" +rm -f "$log" + +# We would expect any functional stack randomization to be at least 5 bits. +if [ "$bits" -lt 5 ]; then + exit 1 +else + exit 0 +fi diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore new file mode 100644 index 000000000000..790c47001e77 --- /dev/null +++ b/tools/testing/selftests/perf_events/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +sigtrap_threads +remove_on_exec diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile new file mode 100644 index 000000000000..fcafa5f0d34c --- /dev/null +++ b/tools/testing/selftests/perf_events/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include +LDFLAGS += -lpthread + +TEST_GEN_PROGS := sigtrap_threads remove_on_exec +include ../lib.mk diff --git a/tools/testing/selftests/perf_events/config b/tools/testing/selftests/perf_events/config new file mode 100644 index 000000000000..ba58ff2203e4 --- /dev/null +++ b/tools/testing/selftests/perf_events/config @@ -0,0 +1 @@ +CONFIG_PERF_EVENTS=y diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c new file mode 100644 index 000000000000..5814611a1dc7 --- /dev/null +++ b/tools/testing/selftests/perf_events/remove_on_exec.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for remove_on_exec. + * + * Copyright (C) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +/* We need the latest siginfo from the kernel repo. */ +#include <sys/types.h> +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 +#define __siginfo_t_defined +#define __sigval_t_defined +#define __sigevent_t_defined +#define _BITS_SIGINFO_CONSTS_H 1 +#define _BITS_SIGEVENT_CONSTS_H 1 + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <linux/perf_event.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +static volatile int signal_count; + +static struct perf_event_attr make_event_attr(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .size = sizeof(attr), + .config = PERF_COUNT_HW_INSTRUCTIONS, + .sample_period = 1000, + .exclude_kernel = 1, + .exclude_hv = 1, + .disabled = 1, + .inherit = 1, + /* + * Children normally retain their inherited event on exec; with + * remove_on_exec, we'll remove their event, but the parent and + * any other non-exec'd children will keep their events. + */ + .remove_on_exec = 1, + .sigtrap = 1, + }; + return attr; +} + +static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext) +{ + if (info->si_code != TRAP_PERF) { + fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code); + return; + } + + signal_count++; +} + +FIXTURE(remove_on_exec) +{ + struct sigaction oldact; + int fd; +}; + +FIXTURE_SETUP(remove_on_exec) +{ + struct perf_event_attr attr = make_event_attr(); + struct sigaction action = {}; + + signal_count = 0; + + /* Initialize sigtrap handler. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0); + + /* Initialize perf event. */ + self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + ASSERT_NE(self->fd, -1); +} + +FIXTURE_TEARDOWN(remove_on_exec) +{ + close(self->fd); + sigaction(SIGTRAP, &self->oldact, NULL); +} + +/* Verify event propagates to fork'd child. */ +TEST_F(remove_on_exec, fork_only) +{ + int status; + pid_t pid = fork(); + + if (pid == 0) { + ASSERT_EQ(signal_count, 0); + ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + while (!signal_count); + _exit(42); + } + + while (!signal_count); /* Child enables event. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(WEXITSTATUS(status), 42); +} + +/* + * Verify that event does _not_ propagate to fork+exec'd child; event enabled + * after fork+exec. + */ +TEST_F(remove_on_exec, fork_exec_then_enable) +{ + pid_t pid_exec, pid_only_fork; + int pipefd[2]; + int tmp; + + /* + * Non-exec child, to ensure exec does not affect inherited events of + * other children. + */ + pid_only_fork = fork(); + if (pid_only_fork == 0) { + /* Block until parent enables event. */ + while (!signal_count); + _exit(42); + } + + ASSERT_NE(pipe(pipefd), -1); + pid_exec = fork(); + if (pid_exec == 0) { + ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1); + close(pipefd[0]); + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + close(pipefd[1]); + + ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */ + /* Wait for exec'd child to start spinning. */ + EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int)); + EXPECT_EQ(tmp, 42); + close(pipefd[0]); + /* Now we can enable the event, knowing the child is doing work. */ + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + /* If the event propagated to the exec'd child, it will exit normally... */ + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */ + EXPECT_EQ(kill(pid_exec, SIGKILL), 0); + + /* Verify removal from child did not affect this task's event. */ + tmp = signal_count; + while (signal_count == tmp); /* Should not hang! */ + /* Nor should it have affected the first child. */ + EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork); + EXPECT_EQ(WEXITSTATUS(tmp), 42); +} + +/* + * Verify that event does _not_ propagate to fork+exec'd child; event enabled + * before fork+exec. + */ +TEST_F(remove_on_exec, enable_then_fork_exec) +{ + pid_t pid_exec; + int tmp; + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + + pid_exec = fork(); + if (pid_exec == 0) { + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + + /* + * The child may exit abnormally at any time if the event propagated and + * a SIGTRAP is sent before the handler was set up. + */ + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */ + EXPECT_EQ(kill(pid_exec, SIGKILL), 0); + + /* Verify removal from child did not affect this task's event. */ + tmp = signal_count; + while (signal_count == tmp); /* Should not hang! */ +} + +TEST_F(remove_on_exec, exec_stress) +{ + pid_t pids[30]; + int i, tmp; + + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + pids[i] = fork(); + if (pids[i] == 0) { + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + + /* Some forked with event disabled, rest with enabled. */ + if (i > 10) + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + } + + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + /* All children should still be running. */ + EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0); + EXPECT_EQ(kill(pids[i], SIGKILL), 0); + } + + /* Verify event is still alive. */ + tmp = signal_count; + while (signal_count == tmp); +} + +/* For exec'd child. */ +static void exec_child(void) +{ + struct sigaction action = {}; + const int val = 42; + + /* Set up sigtrap handler in case we erroneously receive a trap. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + if (sigaction(SIGTRAP, &action, NULL)) + _exit((perror("sigaction failed"), 1)); + + /* Signal parent that we're starting to spin. */ + if (write(STDOUT_FILENO, &val, sizeof(int)) == -1) + _exit((perror("write failed"), 1)); + + /* Should hang here until killed. */ + while (!signal_count); +} + +#define main test_main +TEST_HARNESS_MAIN +#undef main +int main(int argc, char *argv[]) +{ + if (!strcmp(argv[0], "exec_child")) { + exec_child(); + return 1; + } + + return test_main(argc, argv); +} diff --git a/tools/testing/selftests/perf_events/settings b/tools/testing/selftests/perf_events/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/perf_events/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c new file mode 100644 index 000000000000..78ddf5e11625 --- /dev/null +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for perf events with SIGTRAP across all threads. + * + * Copyright (C) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +/* We need the latest siginfo from the kernel repo. */ +#include <sys/types.h> +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 +#define __siginfo_t_defined +#define __sigval_t_defined +#define __sigevent_t_defined +#define _BITS_SIGINFO_CONSTS_H 1 +#define _BITS_SIGEVENT_CONSTS_H 1 + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define NUM_THREADS 5 + +/* Data shared between test body, threads, and signal handler. */ +static struct { + int tids_want_signal; /* Which threads still want a signal. */ + int signal_count; /* Sanity check number of signals received. */ + volatile int iterate_on; /* Variable to set breakpoint on. */ + siginfo_t first_siginfo; /* First observed siginfo_t. */ +} ctx; + +/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +#define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) + +static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_BREAKPOINT, + .size = sizeof(attr), + .sample_period = 1, + .disabled = !enabled, + .bp_addr = (unsigned long)addr, + .bp_type = HW_BREAKPOINT_RW, + .bp_len = HW_BREAKPOINT_LEN_1, + .inherit = 1, /* Children inherit events ... */ + .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ + .remove_on_exec = 1, /* Required by sigtrap. */ + .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ + .sig_data = TEST_SIG_DATA(addr), + }; + return attr; +} + +static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext) +{ + if (info->si_code != TRAP_PERF) { + fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code); + return; + } + + /* + * The data in siginfo_t we're interested in should all be the same + * across threads. + */ + if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) + ctx.first_siginfo = *info; + __atomic_fetch_sub(&ctx.tids_want_signal, syscall(__NR_gettid), __ATOMIC_RELAXED); +} + +static void *test_thread(void *arg) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *)arg; + pid_t tid = syscall(__NR_gettid); + int iter; + int i; + + pthread_barrier_wait(barrier); + + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + iter = ctx.iterate_on; /* read */ + for (i = 0; i < iter - 1; i++) { + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + ctx.iterate_on = iter; /* idempotent write */ + } + + return NULL; +} + +FIXTURE(sigtrap_threads) +{ + struct sigaction oldact; + pthread_t threads[NUM_THREADS]; + pthread_barrier_t barrier; + int fd; +}; + +FIXTURE_SETUP(sigtrap_threads) +{ + struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on); + struct sigaction action = {}; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + /* Initialize sigtrap handler. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0); + + /* Initialize perf event. */ + self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + ASSERT_NE(self->fd, -1); + + /* Spawn threads inheriting perf event. */ + pthread_barrier_init(&self->barrier, NULL, NUM_THREADS + 1); + for (i = 0; i < NUM_THREADS; i++) + ASSERT_EQ(pthread_create(&self->threads[i], NULL, test_thread, &self->barrier), 0); +} + +FIXTURE_TEARDOWN(sigtrap_threads) +{ + pthread_barrier_destroy(&self->barrier); + close(self->fd); + sigaction(SIGTRAP, &self->oldact, NULL); +} + +static void run_test_threads(struct __test_metadata *_metadata, + FIXTURE_DATA(sigtrap_threads) *self) +{ + int i; + + pthread_barrier_wait(&self->barrier); + for (i = 0; i < NUM_THREADS; i++) + ASSERT_EQ(pthread_join(self->threads[i], NULL), 0); +} + +TEST_F(sigtrap_threads, remain_disabled) +{ + run_test_threads(_metadata, self); + EXPECT_EQ(ctx.signal_count, 0); + EXPECT_NE(ctx.tids_want_signal, 0); +} + +TEST_F(sigtrap_threads, enable_event) +{ + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + run_test_threads(_metadata, self); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + + /* Check enabled for parent. */ + ctx.iterate_on = 0; + EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1); +} + +/* Test that modification propagates to all inherited events. */ +TEST_F(sigtrap_threads, modify_and_enable_event) +{ + struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on); + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0); + run_test_threads(_metadata, self); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + + /* Check enabled for parent. */ + ctx.iterate_on = 0; + EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1); +} + +/* Stress test event + signal handling. */ +TEST_F(sigtrap_threads, signal_stress) +{ + ctx.iterate_on = 3000; + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + run_test_threads(_metadata, self); + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index 1dbfb62567d2..6bb993001680 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -21,7 +21,6 @@ then awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` else # No mpstat command, so use all available CPUs. - echo The mpstat command is not available, so greedily using all CPUs. idlecpus=$ncpus fi awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null ' diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index 188b864bc4bf..15d937ba96ca 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -5,10 +5,11 @@ # of this script is to inflict random OS jitter on a concurrently running # test. # -# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ] +# Usage: jitter.sh me jittering-path duration [ sleepmax [ spinmax ] ] # # me: Random-number-generator seed salt. # duration: Time to run in seconds. +# jittering-path: Path to file whose removal will stop this script. # sleepmax: Maximum microseconds to sleep, defaults to one second. # spinmax: Maximum microseconds to spin, defaults to one millisecond. # @@ -17,9 +18,10 @@ # Authors: Paul E. McKenney <paulmck@linux.ibm.com> me=$(($1 * 1000)) -duration=$2 -sleepmax=${3-1000000} -spinmax=${4-1000} +jittering=$2 +duration=$3 +sleepmax=${4-1000000} +spinmax=${5-1000} n=1 @@ -47,7 +49,7 @@ do fi # Check for stop request. - if test -f "$TORTURE_STOPFILE" + if ! test -f "$jittering" then exit 1; fi @@ -67,10 +69,10 @@ do srand(n + me + systime()); ncpus = split(cpus, ca); curcpu = ca[int(rand() * ncpus + 1)]; - mask = lshift(1, curcpu); - if (mask + 0 <= 0) - mask = 1; - printf("%#x\n", mask); + z = ""; + for (i = 1; 4 * i <= curcpu; i++) + z = z "0"; + print "0x" 2 ^ (curcpu % 4) z; }' < /dev/null` n=$(($n+1)) if ! taskset -p $cpumask $$ > /dev/null 2>&1 diff --git a/tools/testing/selftests/rcutorture/bin/jitterstart.sh b/tools/testing/selftests/rcutorture/bin/jitterstart.sh new file mode 100644 index 000000000000..3d710ad291c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/jitterstart.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Start up the specified number of jitter.sh scripts in the background. +# +# Usage: . jitterstart.sh n jittering-dir duration [ sleepmax [ spinmax ] ] +# +# n: Number of jitter.sh scripts to start up. +# jittering-dir: Directory in which to put "jittering" file. +# duration: Time to run in seconds. +# sleepmax: Maximum microseconds to sleep, defaults to one second. +# spinmax: Maximum microseconds to spin, defaults to one millisecond. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +jitter_n=$1 +if test -z "$jitter_n" +then + echo jitterstart.sh: Missing count of jitter.sh scripts to start. + exit 33 +fi +jittering_dir=$2 +if test -z "$jittering_dir" +then + echo jitterstart.sh: Missing directory in which to place jittering file. + exit 34 +fi +shift +shift + +touch ${jittering_dir}/jittering +for ((jitter_i = 1; jitter_i <= $jitter_n; jitter_i++)) +do + jitter.sh $jitter_i "${jittering_dir}/jittering" "$@" & +done diff --git a/tools/testing/selftests/rcutorture/bin/jitterstop.sh b/tools/testing/selftests/rcutorture/bin/jitterstop.sh new file mode 100644 index 000000000000..576a4cf4b79a --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/jitterstop.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Remove the "jittering" file, signaling the jitter.sh scripts to stop, +# then wait for them to terminate. +# +# Usage: . jitterstop.sh jittering-dir +# +# jittering-dir: Directory containing "jittering" file. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +jittering_dir=$1 +if test -z "$jittering_dir" +then + echo jitterstop.sh: Missing directory in which to place jittering file. + exit 34 +fi + +rm -f ${jittering_dir}/jittering +wait diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh new file mode 100755 index 000000000000..46e47a00a7db --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Rerun a series of tests under KVM. +# +# Usage: kvm-again.sh /path/to/old/run [ options ] +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +scriptname=$0 +args="$*" + +T=${TMPDIR-/tmp}/kvm-again.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +if ! test -d tools/testing/selftests/rcutorture/bin +then + echo $scriptname must be run from top-level directory of kernel source tree. + exit 1 +fi + +oldrun=$1 +shift +if ! test -d "$oldrun" +then + echo "Usage: $scriptname /path/to/old/run [ options ]" + exit 1 +fi +if ! cp "$oldrun/batches" $T/batches.oldrun +then + # Later on, can reconstitute this from console.log files. + echo Prior run batches file does not exist: $oldrun/batches + exit 1 +fi + +if test -f "$oldrun/torture_suite" +then + torture_suite="`cat $oldrun/torture_suite`" +elif test -f "$oldrun/TORTURE_SUITE" +then + torture_suite="`cat $oldrun/TORTURE_SUITE`" +else + echo "Prior run torture_suite file does not exist: $oldrun/{torture_suite,TORTURE_SUITE}" + exit 1 +fi + +KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM +PATH=${KVM}/bin:$PATH; export PATH +. functions.sh + +dryrun= +dur= +default_link="cp -R" +rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" + +startdate="`date`" +starttime="`get_starttime`" + +usage () { + echo "Usage: $scriptname $oldrun [ arguments ]:" + echo " --dryrun" + echo " --duration minutes | <seconds>s | <hours>h | <days>d" + echo " --link hard|soft|copy" + echo " --remote" + echo " --rundir /new/res/path" + exit 1 +} + +while test $# -gt 0 +do + case "$1" in + --dryrun) + dryrun=1 + ;; + --duration) + checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error' + mult=60 + if echo "$2" | grep -q 's$' + then + mult=1 + elif echo "$2" | grep -q 'h$' + then + mult=3600 + elif echo "$2" | grep -q 'd$' + then + mult=86400 + fi + ts=`echo $2 | sed -e 's/[smhd]$//'` + dur=$(($ts*mult)) + shift + ;; + --link) + checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--' + case "$2" in + copy) + arg_link="cp -R" + ;; + hard) + arg_link="cp -Rl" + ;; + soft) + arg_link="cp -Rs" + ;; + esac + shift + ;; + --remote) + arg_remote=1 + default_link="cp -as" + ;; + --rundir) + checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' + rundir=$2 + if test -e "$rundir" + then + echo "--rundir $2: Already exists." + usage + fi + shift + ;; + *) + echo Unknown argument $1 + usage + ;; + esac + shift +done +if test -z "$arg_link" +then + arg_link="$default_link" +fi + +echo ---- Re-run results directory: $rundir + +# Copy old run directory tree over and adjust. +mkdir -p "`dirname "$rundir"`" +if ! $arg_link "$oldrun" "$rundir" +then + echo "Cannot copy from $oldrun to $rundir." + usage +fi +rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log +echo $oldrun > "$rundir/re-run" +if ! test -d "$rundir/../../bin" +then + $arg_link "$oldrun/../../bin" "$rundir/../.." +fi +for i in $rundir/*/qemu-cmd +do + cp "$i" $T + qemu_cmd_dir="`dirname "$i"`" + kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" + jitter_dir="`dirname "$kernel_dir"`" + kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i + if test -n "$arg_remote" + then + echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i + fi +done + +# Extract settings from the last qemu-cmd file transformed above. +grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +grep -v '^#' $T/batches.oldrun | awk ' +BEGIN { + oldbatch = 1; +} + +{ + if (oldbatch != $1) { + print "kvm-test-1-run-batch.sh" curbatch; + curbatch = ""; + oldbatch = $1; + } + curbatch = curbatch " " $2; +} + +END { + print "kvm-test-1-run-batch.sh" curbatch +}' > $T/runbatches.sh + +if test -n "$dryrun" +then + echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log" +else + ( cd "$rundir"; sh $T/runbatches.sh ) + kcsan-collapse.sh "$rundir" | tee -a "$rundir/log" + echo | tee -a "$rundir/log" + echo ---- Results directory: $rundir | tee -a "$rundir/log" + kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1 + ret=$? + cat $T/kvm-recheck.sh.out | tee -a "$rundir/log" + echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log" + exit $ret +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 47cf4db10896..e01b31b87044 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -30,7 +30,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" + TORTURE_SUITE="`cat $i/../torture_suite`" configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags kvm-recheck-${TORTURE_SUITE}.sh $i diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh new file mode 100755 index 000000000000..7ea0809e229e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Carry out a kvm-based run for the specified batch of scenarios, which +# might have been built by --build-only kvm.sh run. +# +# Usage: kvm-test-1-run-batch.sh SCENARIO [ SCENARIO ... ] +# +# Each SCENARIO is the name of a directory in the current directory +# containing a ready-to-run qemu-cmd file. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +echo ---- Running batch $* +# Check arguments +runfiles= +for i in "$@" +do + if ! echo $i | grep -q '^[^/.a-z]\+\(\.[0-9]\+\)\?$' + then + echo Bad scenario name: \"$i\" 1>&2 + exit 1 + fi + if ! test -d "$i" + then + echo Scenario name not a directory: \"$i\" 1>&2 + exit 2 + fi + if ! test -f "$i/qemu-cmd" + then + echo Scenario lacks a command file: \"$i/qemu-cmd\" 1>&2 + exit 3 + fi + rm -f $i/build.* + touch $i/build.run + runfiles="$runfiles $i/build.run" +done + +# Extract settings from the qemu-cmd file. +grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +# Start up jitter, start each scenario, wait, end jitter. +echo ---- System running test: `uname -a` +echo ---- Starting kernels. `date` | tee -a log +$TORTURE_JITTER_START +for i in "$@" +do + echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out + echo > $i/kvm-test-1-run-qemu.sh.out + kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & +done +for i in $runfiles +do + while ls $i > /dev/null 2>&1 + do + : + done +done +echo ---- All kernel runs complete. `date` | tee -a log +$TORTURE_JITTER_STOP diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh new file mode 100755 index 000000000000..5b1aa2a4f3f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Carry out a kvm-based run for the specified qemu-cmd file, which might +# have been generated by --build-only kvm.sh run. +# +# Usage: kvm-test-1-run-qemu.sh qemu-cmd-dir +# +# qemu-cmd-dir provides the directory containing qemu-cmd file. +# This is assumed to be of the form prefix/ds/scenario, where +# "ds" is the top-level date-stamped directory and "scenario" +# is the scenario name. Any required adjustments to this file +# must have been made by the caller. The shell-command comments +# at the end of the qemu-cmd file are not optional. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +resdir="$1" +if ! test -d "$resdir" +then + echo $0: Nonexistent directory: $resdir + exit 1 +fi +if ! test -f "$resdir/qemu-cmd" +then + echo $0: Nonexistent qemu-cmd file: $resdir/qemu-cmd + exit 1 +fi + +echo ' ---' `date`: Starting kernel, PID $$ + +# Obtain settings from the qemu-cmd file. +grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +# Decorate qemu-cmd with redirection, backgrounding, and PID capture +sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd +echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd + +# In case qemu refuses to run... +echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log + +# Attempt to run qemu +kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` +( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & +commandcompleted=0 +if test -z "$TORTURE_KCONFIG_GDB_ARG" +then + sleep 10 # Give qemu's pid a chance to reach the file + if test -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid + else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid + fi +fi +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` + if ! test -f $base_resdir/vmlinux + then + base_resdir="`cat re-run`/$resdir" + if ! test -f $base_resdir/vmlinux + then + base_resdir=/path/to + fi + fi + echo Waiting for you to attach a debug session, for example: > /dev/tty + echo " gdb $base_resdir/vmlinux" > /dev/tty + echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty + echo " target remote :1234" > /dev/tty + echo " continue" > /dev/tty + kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` +fi +while : +do + if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + fi + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 + then + if test -n "$TORTURE_KCONFIG_GDB_ARG" + then + : + elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1" + then + break; + fi + sleep 1 + else + commandcompleted=1 + if test $kruntime -lt $seconds + then + echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 + grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 + killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" + if test -n "$killpid" + then + echo "ps -fp $killpid" >> $resdir/Warnings 2>&1 + ps -fp $killpid >> $resdir/Warnings 2>&1 + fi + else + echo ' ---' `date`: "Kernel done" + fi + break + fi +done +if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" +then + qemu_pid=`cat "$resdir/qemu_pid"` +fi +if test $commandcompleted -eq 0 -a -n "$qemu_pid" +then + if ! test -f "$resdir/../STOP.1" + then + echo Grace period for qemu job at pid $qemu_pid + fi + oldline="`tail $resdir/console.log`" + while : + do + if test -f "$resdir/../STOP.1" + then + echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + break + fi + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + if kill -0 $qemu_pid > /dev/null 2>&1 + then + : + else + break + fi + must_continue=no + newline="`tail $resdir/console.log`" + if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : ' + then + must_continue=yes + fi + last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" + if test -z "$last_ts" + then + last_ts=0 + fi + if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + must_continue=yes + fi + if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + break + fi + oldline=$newline + sleep 10 + done +elif test -z "$qemu_pid" +then + echo Unknown PID, cannot kill qemu command +fi + +# Tell the script that this run is done. +rm -f $resdir/build.run + +parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 536d103ef166..420ed5ce9d32 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -7,15 +7,15 @@ # Execute this in the source tree. Do not run it as a background task # because qemu does not seem to like that much. # -# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args +# Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in # # qemu-args defaults to "-enable-kvm -nographic", along with arguments # specifying the number of CPUs and other options # generated from the underlying CPU architecture. -# boot_args defaults to value returned by the per_version_boot_params +# boot_args_in defaults to value returned by the per_version_boot_params # shell function. # -# Anything you specify for either qemu-args or boot_args is appended to +# Anything you specify for either qemu-args or boot_args_in is appended to # the default values. The "-smp" value is deduced from the contents of # the config fragment. # @@ -35,14 +35,13 @@ mkdir $T config_template=${1} config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` title=`echo $config_template | sed -e 's/^.*\///'` -builddir=${2} -resdir=${3} +resdir=${2} if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" then echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it" exit 1 fi -echo ' ---' `date`: Starting build +echo ' ---' `date`: Starting build, PID $$ echo ' ---' Kconfig fragment at: $config_template >> $resdir/log touch $resdir/ConfigFragment.input @@ -73,7 +72,7 @@ config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" @@ -83,6 +82,17 @@ then ln -s $base_resdir/.config $resdir # for kvm-recheck.sh # Arch-independent indicator touch $resdir/builtkernel +elif test "$base_resdir" != "$resdir" +then + # Rerunning previous test for which build failed + ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh + ln -s $base_resdir/.config $resdir # for kvm-recheck.sh + echo Initial build failed, not running KVM, see $resdir. + if test -f $resdir/build.wait + then + mv $resdir/build.wait $resdir/build.ready + fi + exit 1 elif kvm-build.sh $T/KcList $resdir then # Had to build a kernel for this test. @@ -107,23 +117,23 @@ else # Build failed. cp .config $resdir || : echo Build failed, not running KVM, see $resdir. - if test -f $builddir.wait + if test -f $resdir/build.wait then - mv $builddir.wait $builddir.ready + mv $resdir/build.wait $resdir/build.ready fi exit 1 fi -if test -f $builddir.wait +if test -f $resdir/build.wait then - mv $builddir.wait $builddir.ready + mv $resdir/build.wait $resdir/build.ready fi -while test -f $builddir.ready +while test -f $resdir/build.ready do sleep 1 done -seconds=$4 -qemu_args=$5 -boot_args=$6 +seconds=$3 +qemu_args=$4 +boot_args_in=$5 if test -z "$TORTURE_BUILDONLY" then @@ -133,7 +143,7 @@ fi # Generate -smp qemu argument. qemu_args="-enable-kvm -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment` -cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` +cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"` if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS" then echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings @@ -149,16 +159,52 @@ qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`" qemu_append="`identify_qemu_append "$QEMU"`" # Pull in Kconfig-fragment boot parameters -boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" +boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" if test -n "$TORTURE_BOOT_GDB_ARG" then boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" fi + +# Give bare-metal advice +modprobe_args="`echo $boot_args | tr -s ' ' '\012' | grep "^$TORTURE_MOD\." | sed -e "s/$TORTURE_MOD\.//g"`" +kboot_args="`echo $boot_args | tr -s ' ' '\012' | grep -v "^$TORTURE_MOD\."`" +testid_txt="`dirname $resdir`/testid.txt" +touch $resdir/bare-metal +echo To run this scenario on bare metal: >> $resdir/bare-metal +echo >> $resdir/bare-metal +echo " 1." Set your bare-metal build tree to the state shown in this file: >> $resdir/bare-metal +echo " " $testid_txt >> $resdir/bare-metal +echo " 2." Update your bare-metal build tree"'"s .config based on this file: >> $resdir/bare-metal +echo " " $resdir/ConfigFragment >> $resdir/bare-metal +echo " 3." Make the bare-metal kernel"'"s build system aware of your .config updates: >> $resdir/bare-metal +echo " " $ 'yes "" | make oldconfig' >> $resdir/bare-metal +echo " 4." Build your bare-metal kernel. >> $resdir/bare-metal +echo " 5." Boot your bare-metal kernel with the following parameters: >> $resdir/bare-metal +echo " " $kboot_args >> $resdir/bare-metal +echo " 6." Start the test with the following command: >> $resdir/bare-metal +echo " " $ modprobe $TORTURE_MOD $modprobe_args >> $resdir/bare-metal +echo " 7." After some time, end the test with the following command: >> $resdir/bare-metal +echo " " $ rmmod $TORTURE_MOD >> $resdir/bare-metal +echo " 8." Copy your bare-metal kernel"'"s .config file, overwriting this file: >> $resdir/bare-metal +echo " " $resdir/.config >> $resdir/bare-metal +echo " 9." Copy the console output from just before the modprobe to just after >> $resdir/bare-metal +echo " " the rmmod into this file: >> $resdir/bare-metal +echo " " $resdir/console.log >> $resdir/bare-metal +echo "10." Check for runtime errors using the following command: >> $resdir/bare-metal +echo " " $ tools/testing/selftests/rcutorture/bin/kvm-recheck.sh `dirname $resdir` >> $resdir/bare-metal +echo >> $resdir/bare-metal +echo Some of the above steps may be skipped if you build your bare-metal >> $resdir/bare-metal +echo kernel here: `head -n 1 $testid_txt | sed -e 's/^Build directory: //'` >> $resdir/bare-metal + echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd echo "# seconds=$seconds" >> $resdir/qemu-cmd +echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cmd +echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd +echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd +echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -167,140 +213,4 @@ then exit 0 fi -# Decorate qemu-cmd with redirection, backgrounding, and PID capture -sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd -echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd - -# In case qemu refuses to run... -echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log - -# Attempt to run qemu -kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` -( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & -commandcompleted=0 -if test -z "$TORTURE_KCONFIG_GDB_ARG" -then - sleep 10 # Give qemu's pid a chance to reach the file - if test -s "$resdir/qemu_pid" - then - qemu_pid=`cat "$resdir/qemu_pid"` - echo Monitoring qemu job at pid $qemu_pid - else - qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid - fi -fi -if test -n "$TORTURE_KCONFIG_GDB_ARG" -then - echo Waiting for you to attach a debug session, for example: > /dev/tty - echo " gdb $base_resdir/vmlinux" > /dev/tty - echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty - echo " target remote :1234" > /dev/tty - echo " continue" > /dev/tty - kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` -fi -while : -do - if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" - then - qemu_pid=`cat "$resdir/qemu_pid"` - fi - kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 - then - if test -n "$TORTURE_KCONFIG_GDB_ARG" - then - : - elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1" - then - break; - fi - sleep 1 - else - commandcompleted=1 - if test $kruntime -lt $seconds - then - echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 - killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" - if test -n "$killpid" - then - echo "ps -fp $killpid" >> $resdir/Warnings 2>&1 - ps -fp $killpid >> $resdir/Warnings 2>&1 - fi - # Reduce probability of PID reuse by allowing a one-minute buffer - if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids" - then - awk < "$resdir/../jitter_pids" ' - NF > 0 { - pidlist = pidlist " " $1; - n++; - } - END { - if (n > 0) { - print "kill " pidlist; - } - }' | sh - fi - else - echo ' ---' `date`: "Kernel done" - fi - break - fi -done -if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" -then - qemu_pid=`cat "$resdir/qemu_pid"` -fi -if test $commandcompleted -eq 0 -a -n "$qemu_pid" -then - if ! test -f "$resdir/../STOP.1" - then - echo Grace period for qemu job at pid $qemu_pid - fi - oldline="`tail $resdir/console.log`" - while : - do - if test -f "$resdir/../STOP.1" - then - echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 - kill -KILL $qemu_pid - break - fi - kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if kill -0 $qemu_pid > /dev/null 2>&1 - then - : - else - break - fi - must_continue=no - newline="`tail $resdir/console.log`" - if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : ' - then - must_continue=yes - fi - last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" - if test -z "$last_ts" - then - last_ts=0 - fi - if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) - then - must_continue=yes - fi - if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) - then - echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 - kill -KILL $qemu_pid - break - fi - oldline=$newline - sleep 10 - done -elif test -z "$qemu_pid" -then - echo Unknown PID, cannot kill qemu command -fi - -parse-console.sh $resdir/console.log $title +kvm-test-1-run-qemu.sh $resdir diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh index c45a953ef393..d40b4e60a50c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -3,7 +3,7 @@ # # Transform a qemu-cmd file to allow reuse. # -# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out +# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out # # bzImage: Kernel and initrd from the same prior kvm.sh run. # console.log: File into which to place console output. @@ -29,20 +29,62 @@ then echo "Need console log file name." exit 1 fi +jitter_dir="$3" +if test -z "$jitter_dir" || ! test -d "$jitter_dir" +then + echo "Need valid jitter directory: '$jitter_dir'" + exit 1 +fi +seconds="$4" +if test -n "$seconds" && echo $seconds | grep -q '[^0-9]' +then + echo "Invalid duration, should be numeric in seconds: '$seconds'" + exit 1 +fi + +awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ + -v seconds="$seconds" ' +/^# seconds=/ { + if (seconds == "") + print $0; + else + print "# seconds=" seconds; + next; +} + +/^# TORTURE_JITTER_START=/ { + print "# TORTURE_JITTER_START=\". jitterstart.sh " $4 " " jitter_dir " " $6 " " $7; + next; +} + +/^# TORTURE_JITTER_STOP=/ { + print "# TORTURE_JITTER_STOP=\". jitterstop.sh " " " jitter_dir " " $5; + next; +} + +/^#/ { + print $0; + next; +} -awk -v image="$image" -v consolelog="$consolelog" ' { line = ""; for (i = 1; i <= NF; i++) { - if (line == "") + if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { + sub(/[0-9]*$/, seconds, $i); + if (line == "") + line = $i; + else + line = line " " $i; + } else if (line == "") { line = $i; - else + } else { line = line " " $i; + } if ($i == "-serial") { i++; line = line " file:" consolelog; - } - if ($i == "-kernel") { + } else if ($i == "-kernel") { i++; line = line " " image; } diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 8d3c99b35e06..6bf00a003d3d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -29,17 +29,21 @@ PATH=${KVM}/bin:$PATH; export PATH TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" +TORTURE_BUILDONLY= TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KCONFIG_GDB_ARG="" TORTURE_BOOT_GDB_ARG="" TORTURE_QEMU_GDB_ARG="" +TORTURE_JITTER_START="" +TORTURE_JITTER_STOP="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" TORTURE_QEMU_MEM=512 TORTURE_SHUTDOWN_GRACE=180 TORTURE_SUITE=rcu +TORTURE_MOD=rcutorture TORTURE_TRUST_MAKE="" resdir="" configs="" @@ -100,7 +104,7 @@ do TORTURE_BUILDONLY=1 ;; --configs|--config) - checkarg --configs "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + checkarg --configs "(list of config files)" "$#" "$2" '^[^/.a-z]\+$' '^--' configs="$configs $2" shift ;; @@ -116,7 +120,7 @@ do shift ;; --datestamp) - checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._-/]*$' '^--' + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--' ds=$2 shift ;; @@ -215,6 +219,7 @@ do --torture) checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 + TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`" shift if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale then @@ -381,6 +386,7 @@ TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG +TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC @@ -399,12 +405,17 @@ echo Results directory: $resdir/$ds echo $scriptname $args touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log -echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE -pwd > $resdir/$ds/testid.txt +echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite +echo Build directory: `pwd` > $resdir/$ds/testid.txt if test -d .git then + echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt git status >> $resdir/$ds/testid.txt - git rev-parse HEAD >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt git diff HEAD >> $resdir/$ds/testid.txt fi ___EOF___ @@ -434,8 +445,17 @@ function dump(first, pastlast, batchnum) print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "needqemurun=" jn=1 + njitter = 0; + split(jitter, ja); + if (ja[1] == -1 && ncpus == 0) + njitter = 1; + else if (ja[1] == -1) + njitter = ncpus; + else + njitter = ja[1]; + print "TORTURE_JITTER_START=\". jitterstart.sh " njitter " " rd " " dur " " ja[2] " " ja[3] "\"; export TORTURE_JITTER_START"; + print "TORTURE_JITTER_STOP=\". jitterstop.sh " rd " \"; export TORTURE_JITTER_STOP" for (j = first; j < pastlast; j++) { - builddir=KVM "/b" j - first + 1 cpusr[jn] = cpus[j]; if (cfrep[cf[j]] == "") { cfr[jn] = cf[j]; @@ -444,15 +464,15 @@ function dump(first, pastlast, batchnum) cfrep[cf[j]]++; cfr[jn] = cf[j] "." cfrep[cf[j]]; } + builddir=rd cfr[jn] "/build"; if (cpusr[jn] > ncpus && ncpus != 0) ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; - print "rm -f " builddir ".*"; - print "touch " builddir ".wait"; print "mkdir " rd cfr[jn] " || :"; - print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" + print "touch " builddir ".wait"; + print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "while test -f " builddir ".wait" print "do" @@ -461,23 +481,21 @@ function dump(first, pastlast, batchnum) print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log"; jn++; } + print "runfiles=" for (j = 1; j < jn; j++) { - builddir=KVM "/b" j - print "rm -f " builddir ".ready" + builddir=rd cfr[j] "/build"; + if (TORTURE_BUILDONLY) + print "rm -f " builddir ".ready" + else + print "mv " builddir ".ready " builddir ".run" + print "runfiles=\"$runfiles " builddir ".run\"" + fi print "if test -f \"" rd cfr[j] "/builtkernel\"" print "then" print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log"; print "\tneedqemurun=1" print "fi" } - njitter = 0; - split(jitter, ja); - if (ja[1] == -1 && ncpus == 0) - njitter = 1; - else if (ja[1] == -1) - njitter = ncpus; - else - njitter = ja[1]; if (TORTURE_BUILDONLY && njitter != 0) { njitter = 0; print "echo Build-only run, so suppressing jitter | tee -a " rd "log" @@ -488,19 +506,18 @@ function dump(first, pastlast, batchnum) print "if test -n \"$needqemurun\"" print "then" print "\techo ---- Starting kernels. `date` | tee -a " rd "log"; - print "\techo > " rd "jitter_pids" - for (j = 0; j < njitter; j++) { - print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&" - print "\techo $! >> " rd "jitter_pids" - } - print "\twait" + print "\t$TORTURE_JITTER_START"; + print "\twhile ls $runfiles > /dev/null 2>&1" + print "\tdo" + print "\t\t:" + print "\tdone" + print "\t$TORTURE_JITTER_STOP"; print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log"; print "else" print "\twait" print "\techo ---- No kernel runs. `date` | tee -a " rd "log"; print "fi" for (j = 1; j < jn; j++) { - builddir=KVM "/b" j print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log"; print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log"; } @@ -548,6 +565,18 @@ echo 'ret=$?' >> $T/script echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script echo 'exit $ret' >> $T/script +# Extract the tests and their batches from the script. +egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | + sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | + awk ' + /^----Start/ { + batchno = $3; + next; + } + { + print batchno, $1, $2 + }' > $T/batches + if test "$dryrun" = script then cat $T/script @@ -566,21 +595,14 @@ then exit 0 elif test "$dryrun" = batches then - # Extract the tests and their batches from the script. - egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | - sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | - awk ' - /^----Start/ { - batchno = $3; - next; - } - { - print batchno, $1, $2 - }' + cat $T/batches + exit 0 else - # Not a dryrun, so run the script. + # Not a dryrun. Record the batches and the number of CPUs, then run the script. bash $T/script ret=$? + cp $T/batches $resdir/$ds/batches + echo '#' cpus=$cpus >> $resdir/$ds/batches echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log exit $ret fi diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index ad7525b7ac29..56e2e1a42569 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -374,7 +374,7 @@ done if test "$do_kvfree" = "yes" then torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make fi echo " --- " $scriptname $args diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index f2b20db9e296..98b6175e5aa0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -7,8 +7,8 @@ TREE07 TREE09 SRCU-N SRCU-P -SRCU-t -SRCU-u +SRCU-T +SRCU-U TINY01 TINY02 TASKS01 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index d6557c38dfe4..d6557c38dfe4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot index 238bfe3bd0cc..238bfe3bd0cc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index 6bc24e99862f..6bc24e99862f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot index ce48c7b82673..ce48c7b82673 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 1c218944b1e9..64f864f1f361 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -4,3 +4,4 @@ rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs +tree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot index 5adc6756792a..a8d94caf7d2f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -1 +1 @@ -rcutree.rcu_fanout_leaf=4 nohz_full=1-7 +rcutree.rcu_fanout_leaf=4 nohz_full=1-N diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index 22478fd3a865..94d38445d393 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -1,3 +1,3 @@ rcupdate.rcu_self_test=1 rcutree.rcu_fanout_exact=1 -rcu_nocbs=0-7 +rcu_nocbs=all diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 0333e9b18522..ffbe15109f0d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -12,5 +12,5 @@ # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { echo $1 rcuscale.shutdown=1 \ - rcuscale.verbose=1 + rcuscale.verbose=0 } diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh index 321e82641287..f81fa2c541a6 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -12,5 +12,5 @@ # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { echo $1 refscale.shutdown=1 \ - refscale.verbose=1 + refscale.verbose=0 } diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore new file mode 100644 index 000000000000..ab68442b6bc8 --- /dev/null +++ b/tools/testing/selftests/resctrl/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +resctrl_tests diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index d585cc1948cc..6bcee2ec91a9 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,5 +1,5 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS = -g -Wall +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README index 6e5a0ffa18e8..4b36b25b6ac0 100644 --- a/tools/testing/selftests/resctrl/README +++ b/tools/testing/selftests/resctrl/README @@ -46,8 +46,8 @@ ARGUMENTS Parameter '-h' shows usage information. usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits] - -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf - -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat + -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf + -t test list: run tests specified in the test list, e.g. -t mbm, mba, cmt, cat -n no_of_bits: run cache tests using specified no of bits in cache bit mask -p cpu_no: specify CPU number to run the test. 1 is default -h: help diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 38dbf4962e33..68ff856d36f0 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -111,7 +111,7 @@ static int get_llc_perf(unsigned long *llc_perf_miss) /* * Get LLC Occupancy as reported by RESCTRL FS - * For CQM, + * For CMT, * 1. If con_mon grp and mon grp given, then read from mon grp in * con_mon grp * 2. If only con_mon grp given, then read from con_mon grp @@ -182,7 +182,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) /* * Measure cache miss from perf. */ - if (!strcmp(param->resctrl_val, "cat")) { + if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = get_llc_perf(&llc_perf_miss); if (ret < 0) return ret; @@ -192,7 +192,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) /* * Measure llc occupancy from resctrl. */ - if (!strcmp(param->resctrl_val, "cqm")) { + if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) { ret = get_llc_occu_resctrl(&llc_occu_resc); if (ret < 0) return ret; @@ -234,7 +234,7 @@ int cat_val(struct resctrl_val_param *param) if (ret) return ret; - if ((strcmp(resctrl_val, "cat") == 0)) { + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = initialize_llc_perf(); if (ret) return ret; @@ -242,7 +242,7 @@ int cat_val(struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if (strcmp(resctrl_val, "cat") == 0) { + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; @@ -270,3 +270,45 @@ int cat_val(struct resctrl_val_param *param) return ret; } + +/* + * show_cache_info: show cache test result information + * @sum_llc_val: sum of LLC cache result data + * @no_of_bits: number of bits + * @cache_span: cache span in bytes for CMT or in lines for CAT + * @max_diff: max difference + * @max_diff_percent: max difference percentage + * @num_of_runs: number of runs + * @platform: show test information on this platform + * @cmt: CMT test or CAT test + * + * Return: 0 on success. non-zero on failure. + */ +int show_cache_info(unsigned long sum_llc_val, int no_of_bits, + unsigned long cache_span, unsigned long max_diff, + unsigned long max_diff_percent, unsigned long num_of_runs, + bool platform, bool cmt) +{ + unsigned long avg_llc_val = 0; + float diff_percent; + long avg_diff = 0; + int ret; + + avg_llc_val = sum_llc_val / (num_of_runs - 1); + avg_diff = (long)abs(cache_span - avg_llc_val); + diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; + + ret = platform && abs((int)diff_percent) > max_diff_percent && + (cmt ? (abs(avg_diff) > max_diff) : true); + + ksft_print_msg("%s Check cache miss rate within %d%%\n", + ret ? "Fail:" : "Pass:", max_diff_percent); + + ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent)); + ksft_print_msg("Number of bits: %d\n", no_of_bits); + ksft_print_msg("Average LLC val: %lu\n", avg_llc_val); + ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines", + cache_span); + + return ret; +} diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 5da43767b973..cd4f68388e0f 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -17,10 +17,10 @@ #define MAX_DIFF_PERCENT 4 #define MAX_DIFF 1000000 -int count_of_bits; -char cbm_mask[256]; -unsigned long long_mask; -unsigned long cache_size; +static int count_of_bits; +static char cbm_mask[256]; +static unsigned long long_mask; +static unsigned long cache_size; /* * Change schemata. Write schemata to specified @@ -52,27 +52,6 @@ static int cat_setup(int num, ...) return ret; } -static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits, - unsigned long span) -{ - unsigned long allocated_cache_lines = span / 64; - unsigned long avg_llc_perf_miss = 0; - float diff_percent; - - avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1); - diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) / - allocated_cache_lines * 100; - - printf("%sok CAT: cache miss rate within %d%%\n", - !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ? - "not " : "", MAX_DIFF_PERCENT); - tests_run++; - printf("# Percent diff=%d\n", abs((int)diff_percent)); - printf("# Number of bits: %d\n", no_of_bits); - printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss); - printf("# Allocated cache lines: %lu\n", allocated_cache_lines); -} - static int check_results(struct resctrl_val_param *param) { char *token_array[8], temp[512]; @@ -80,7 +59,7 @@ static int check_results(struct resctrl_val_param *param) int runs = 0, no_of_bits = 0; FILE *fp; - printf("# Checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(param->filename, "r"); if (!fp) { perror("# Cannot open file"); @@ -108,9 +87,9 @@ static int check_results(struct resctrl_val_param *param) fclose(fp); no_of_bits = count_bits(param->mask); - show_cache_info(sum_llc_perf_miss, no_of_bits, param->span); - - return 0; + return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, + MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + !is_amd, false); } void cat_test_cleanup(void) @@ -132,11 +111,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) if (ret) return ret; - if (!validate_resctrl_feature_request("cat")) - return -1; - /* Get default cbm mask for L3/L2 cache */ - ret = get_cbm_mask(cache_type); + ret = get_cbm_mask(cache_type, cbm_mask); if (ret) return ret; @@ -146,15 +122,18 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) ret = get_cache_size(cpu_no, cache_type, &cache_size); if (ret) return ret; - printf("cache size :%lu\n", cache_size); + ksft_print_msg("Cache size :%lu\n", cache_size); /* Get max number of bits from default-cabm mask */ count_of_bits = count_bits(long_mask); - if (n < 1 || n > count_of_bits - 1) { - printf("Invalid input value for no_of_bits n!\n"); - printf("Please Enter value in range 1 to %d\n", - count_of_bits - 1); + if (!n) + n = count_of_bits / 2; + + if (n > count_of_bits - 1) { + ksft_print_msg("Invalid input value for no_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", + count_of_bits - 1); return -1; } @@ -164,7 +143,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) return -1; struct resctrl_val_param param = { - .resctrl_val = "cat", + .resctrl_val = CAT_STR, .cpu_no = cpu_no, .mum_resctrlfs = 0, .setup = cat_setup, diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cmt_test.c index c8756152bd61..8968e36db99d 100644 --- a/tools/testing/selftests/resctrl/cqm_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Cache Monitoring Technology (CQM) test + * Cache Monitoring Technology (CMT) test * * Copyright (C) 2018 Intel Corporation * @@ -11,17 +11,17 @@ #include "resctrl.h" #include <unistd.h> -#define RESULT_FILE_NAME "result_cqm" +#define RESULT_FILE_NAME "result_cmt" #define NUM_OF_RUNS 5 #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 -int count_of_bits; -char cbm_mask[256]; -unsigned long long_mask; -unsigned long cache_size; +static int count_of_bits; +static char cbm_mask[256]; +static unsigned long long_mask; +static unsigned long cache_size; -static int cqm_setup(int num, ...) +static int cmt_setup(int num, ...) { struct resctrl_val_param *p; va_list param; @@ -39,38 +39,6 @@ static int cqm_setup(int num, ...) return 0; } -static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits, - unsigned long span) -{ - unsigned long avg_llc_occu_resc = 0; - float diff_percent; - long avg_diff = 0; - bool res; - - avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1); - avg_diff = (long)abs(span - avg_llc_occu_resc); - - diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100; - - if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) || - (abs(avg_diff) <= MAX_DIFF)) - res = true; - else - res = false; - - printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not", - MAX_DIFF, (int)MAX_DIFF_PERCENT); - - printf("# diff: %ld\n", avg_diff); - printf("# percent diff=%d\n", abs((int)diff_percent)); - printf("# Results are displayed in (Bytes)\n"); - printf("# Number of bits: %d\n", no_of_bits); - printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc); - printf("# llc_occu_exp (span): %lu\n", span); - - tests_run++; -} - static int check_results(struct resctrl_val_param *param, int no_of_bits) { char *token_array[8], temp[512]; @@ -78,7 +46,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) int runs = 0; FILE *fp; - printf("# checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(param->filename, "r"); if (!fp) { perror("# Error in opening file\n"); @@ -86,7 +54,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) return errno; } - while (fgets(temp, 1024, fp)) { + while (fgets(temp, sizeof(temp), fp)) { char *token = strtok(temp, ":\t"); int fields = 0; @@ -101,17 +69,18 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) runs++; } fclose(fp); - show_cache_info(sum_llc_occu_resc, no_of_bits, param->span); - return 0; + return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span, + MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + true, true); } -void cqm_test_cleanup(void) +void cmt_test_cleanup(void) { remove(RESULT_FILE_NAME); } -int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) +int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) { int ret, mum_resctrlfs; @@ -122,10 +91,10 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) if (ret) return ret; - if (!validate_resctrl_feature_request("cqm")) + if (!validate_resctrl_feature_request(CMT_STR)) return -1; - ret = get_cbm_mask("L3"); + ret = get_cbm_mask("L3", cbm_mask); if (ret) return ret; @@ -134,18 +103,18 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) ret = get_cache_size(cpu_no, "L3", &cache_size); if (ret) return ret; - printf("cache size :%lu\n", cache_size); + ksft_print_msg("Cache size :%lu\n", cache_size); count_of_bits = count_bits(long_mask); if (n < 1 || n > count_of_bits) { - printf("Invalid input value for numbr_of_bits n!\n"); - printf("Please Enter value in range 1 to %d\n", count_of_bits); + ksft_print_msg("Invalid input value for numbr_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits); return -1; } struct resctrl_val_param param = { - .resctrl_val = "cqm", + .resctrl_val = CMT_STR, .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, @@ -154,7 +123,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) .mask = ~(long_mask << n) & long_mask, .span = cache_size * n / count_of_bits, .num_of_runs = 0, - .setup = cqm_setup, + .setup = cmt_setup, }; if (strcmp(benchmark_cmd[0], "fill_buf") == 0) @@ -170,7 +139,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) if (ret) return ret; - cqm_test_cleanup(); + cmt_test_cleanup(); return 0; } diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config new file mode 100644 index 000000000000..8d9f2deb56ed --- /dev/null +++ b/tools/testing/selftests/resctrl/config @@ -0,0 +1,2 @@ +CONFIG_X86_CPU_RESCTRL=y +CONFIG_PROC_CPU_RESCTRL=y diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 79c611c99a3d..51e5cf22632f 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -115,7 +115,7 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, while (1) { ret = fill_one_span_read(start_ptr, end_ptr); - if (!strcmp(resctrl_val, "cat")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) break; } @@ -134,7 +134,7 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, { while (1) { fill_one_span_write(start_ptr, end_ptr); - if (!strcmp(resctrl_val, "cat")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) break; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 7bf8eaa6204b..1a1bdb6180cf 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -12,7 +12,7 @@ #define RESULT_FILE_NAME "result_mba" #define NUM_OF_RUNS 5 -#define MAX_DIFF 300 +#define MAX_DIFF_PERCENT 5 #define ALLOCATION_MAX 100 #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 @@ -56,13 +56,14 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) int allocation, runs; bool failed = false; - printf("# Results are displayed in (MB)\n"); + ksft_print_msg("Results are displayed in (MB)\n"); /* Memory bandwidth from 100% down to 10% */ for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; allocation++) { unsigned long avg_bw_imc, avg_bw_resc; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; - unsigned long avg_diff; + int avg_diff_per; + float avg_diff; /* * The first run is discarded due to inaccurate value from @@ -76,23 +77,26 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); - avg_diff = labs((long)(avg_bw_resc - avg_bw_imc)); - - printf("%sok MBA schemata percentage %u smaller than %d %%\n", - avg_diff > MAX_DIFF ? "not " : "", - ALLOCATION_MAX - ALLOCATION_STEP * allocation, - MAX_DIFF); - tests_run++; - printf("# avg_diff: %lu\n", avg_diff); - printf("# avg_bw_imc: %lu\n", avg_bw_imc); - printf("# avg_bw_resc: %lu\n", avg_bw_resc); - if (avg_diff > MAX_DIFF) + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n", + avg_diff_per > MAX_DIFF_PERCENT ? + "Fail:" : "Pass:", + MAX_DIFF_PERCENT, + ALLOCATION_MAX - ALLOCATION_STEP * allocation); + + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + if (avg_diff_per > MAX_DIFF_PERCENT) failed = true; } - printf("%sok schemata change using MBA%s\n", failed ? "not " : "", - failed ? " # at least one test failed" : ""); - tests_run++; + ksft_print_msg("%s Check schemata change using MBA\n", + failed ? "Fail:" : "Pass:"); + if (failed) + ksft_print_msg("At least one test failed\n"); } static int check_results(void) @@ -141,7 +145,7 @@ void mba_test_cleanup(void) int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) { struct resctrl_val_param param = { - .resctrl_val = "mba", + .resctrl_val = MBA_STR, .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, @@ -154,9 +158,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) remove(RESULT_FILE_NAME); - if (!validate_resctrl_feature_request("mba")) - return -1; - ret = resctrl_val(benchmark_cmd, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 4700f7453f81..8392e5c55ed0 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -11,16 +11,16 @@ #include "resctrl.h" #define RESULT_FILE_NAME "result_mbm" -#define MAX_DIFF 300 +#define MAX_DIFF_PERCENT 5 #define NUM_OF_RUNS 5 -static void +static int show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) { unsigned long avg_bw_imc = 0, avg_bw_resc = 0; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; - long avg_diff = 0; - int runs; + int runs, ret, avg_diff_per; + float avg_diff = 0; /* * Discard the first value which is inaccurate due to monitoring setup @@ -33,15 +33,18 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) avg_bw_imc = sum_bw_imc / 4; avg_bw_resc = sum_bw_resc / 4; - avg_diff = avg_bw_resc - avg_bw_imc; - - printf("%sok MBM: diff within %d%%\n", - labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF); - tests_run++; - printf("# avg_diff: %lu\n", labs(avg_diff)); - printf("# Span (MB): %d\n", span); - printf("# avg_bw_imc: %lu\n", avg_bw_imc); - printf("# avg_bw_resc: %lu\n", avg_bw_resc); + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ret = avg_diff_per > MAX_DIFF_PERCENT; + ksft_print_msg("%s Check MBM diff within %d%%\n", + ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + ksft_print_msg("Span (MB): %d\n", span); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + + return ret; } static int check_results(int span) @@ -49,10 +52,10 @@ static int check_results(int span) unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS]; char temp[1024], *token_array[8]; char output[] = RESULT_FILE_NAME; - int runs; + int runs, ret; FILE *fp; - printf("# Checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(output, "r"); if (!fp) { @@ -76,11 +79,11 @@ static int check_results(int span) runs++; } - show_bw_info(bw_imc, bw_resc, span); + ret = show_bw_info(bw_imc, bw_resc, span); fclose(fp); - return 0; + return ret; } static int mbm_setup(int num, ...) @@ -114,7 +117,7 @@ void mbm_test_cleanup(void) int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) { struct resctrl_val_param param = { - .resctrl_val = "mbm", + .resctrl_val = MBM_STR, .ctrlgrp = "c1", .mongrp = "m1", .span = span, @@ -128,9 +131,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) remove(RESULT_FILE_NAME); - if (!validate_resctrl_feature_request("mbm")) - return -1; - ret = resctrl_val(benchmark_cmd, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 39bf59c6b9c5..1ad10c47e31d 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -23,11 +23,16 @@ #include <sys/eventfd.h> #include <asm/unistd.h> #include <linux/perf_event.h> +#include "../kselftest.h" #define MB (1024 * 1024) #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" #define CBM_MASK_PATH "/sys/fs/resctrl/info" +#define L3_PATH "/sys/fs/resctrl/info/L3" +#define MB_PATH "/sys/fs/resctrl/info/MB" +#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON" +#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features" #define PARENT_EXIT(err_msg) \ do { \ @@ -62,11 +67,15 @@ struct resctrl_val_param { int (*setup)(int num, ...); }; -pid_t bm_pid, ppid; -int tests_run; +#define MBM_STR "mbm" +#define MBA_STR "mba" +#define CMT_STR "cmt" +#define CAT_STR "cat" -char llc_occup_path[1024]; -bool is_amd; +extern pid_t bm_pid, ppid; + +extern char llc_occup_path[1024]; +extern bool is_amd; bool check_resctrlfs_support(void); int filter_dmesg(void); @@ -74,7 +83,7 @@ int remount_resctrlfs(bool mum_resctrlfs); int get_resource_id(int cpu_no, int *resource_id); int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); -bool validate_resctrl_feature_request(char *resctrl_val); +bool validate_resctrl_feature_request(const char *resctrl_val); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no); void run_benchmark(int signum, siginfo_t *info, void *ucontext); @@ -92,16 +101,20 @@ void tests_cleanup(void); void mbm_test_cleanup(void); int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); void mba_test_cleanup(void); -int get_cbm_mask(char *cache_type); +int get_cbm_mask(char *cache_type, char *cbm_mask); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int cat_val(struct resctrl_val_param *param); void cat_test_cleanup(void); int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); -int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd); +int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd); unsigned int count_bits(unsigned long n); -void cqm_test_cleanup(void); +void cmt_test_cleanup(void); int get_core_sibling(int cpu_no); int measure_cache_vals(struct resctrl_val_param *param, int bm_pid); +int show_cache_info(unsigned long sum_llc_val, int no_of_bits, + unsigned long cache_span, unsigned long max_diff, + unsigned long max_diff_percent, unsigned long num_of_runs, + bool platform, bool cmt); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 425cc85ac883..f51b5fc066a3 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -37,10 +37,10 @@ void detect_amd(void) static void cmd_help(void) { printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n"); - printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM"); - printf("\t default benchmark is builtin fill_buf\n"); + printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n"); + printf("\t default benchmark is builtin fill_buf\n"); printf("\t-t test list: run tests specified in the test list, "); - printf("e.g. -t mbm, mba, cqm, cat\n"); + printf("e.g. -t mbm, mba, cmt, cat\n"); printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); printf("\t-h: help\n"); @@ -50,17 +50,88 @@ void tests_cleanup(void) { mbm_test_cleanup(); mba_test_cleanup(); - cqm_test_cleanup(); + cmt_test_cleanup(); + cat_test_cleanup(); +} + +static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span, + int cpu_no, char *bw_report) +{ + int res; + + ksft_print_msg("Starting MBM BW change ...\n"); + + if (!validate_resctrl_feature_request(MBM_STR)) { + ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", MBA_STR); + res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); + ksft_test_result(!res, "MBM: bw change\n"); + mbm_test_cleanup(); +} + +static void run_mba_test(bool has_ben, char **benchmark_cmd, int span, + int cpu_no, char *bw_report) +{ + int res; + + ksft_print_msg("Starting MBA Schemata change ...\n"); + + if (!validate_resctrl_feature_request(MBA_STR)) { + ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[1], "%d", span); + res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); + ksft_test_result(!res, "MBA: schemata change\n"); + mba_test_cleanup(); +} + +static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no) +{ + int res; + + ksft_print_msg("Starting CMT test ...\n"); + if (!validate_resctrl_feature_request(CMT_STR)) { + ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", CMT_STR); + res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); + ksft_test_result(!res, "CMT: test\n"); + cmt_test_cleanup(); +} + +static void run_cat_test(int cpu_no, int no_of_bits) +{ + int res; + + ksft_print_msg("Starting CAT test ...\n"); + + if (!validate_resctrl_feature_request(CAT_STR)) { + ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); + return; + } + + res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); + ksft_test_result(!res, "CAT: test\n"); cat_test_cleanup(); } int main(int argc, char **argv) { - bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true; - int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5; + bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; + int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0; char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; - int ben_ind, ben_count; + int ben_ind, ben_count, tests = 0; bool cat_test = true; for (i = 0; i < argc; i++) { @@ -73,7 +144,7 @@ int main(int argc, char **argv) } } - while ((c = getopt(argc_new, argv, "ht:b:")) != -1) { + while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) { char *token; switch (c) { @@ -82,17 +153,21 @@ int main(int argc, char **argv) mbm_test = false; mba_test = false; - cqm_test = false; + cmt_test = false; cat_test = false; while (token) { - if (!strcmp(token, "mbm")) { + if (!strncmp(token, MBM_STR, sizeof(MBM_STR))) { mbm_test = true; - } else if (!strcmp(token, "mba")) { + tests++; + } else if (!strncmp(token, MBA_STR, sizeof(MBA_STR))) { mba_test = true; - } else if (!strcmp(token, "cqm")) { - cqm_test = true; - } else if (!strcmp(token, "cat")) { + tests++; + } else if (!strncmp(token, CMT_STR, sizeof(CMT_STR))) { + cmt_test = true; + tests++; + } else if (!strncmp(token, CAT_STR, sizeof(CAT_STR))) { cat_test = true; + tests++; } else { printf("invalid argument\n"); @@ -106,6 +181,10 @@ int main(int argc, char **argv) break; case 'n': no_of_bits = atoi(optarg); + if (no_of_bits <= 0) { + printf("Bail out! invalid argument for no_of_bits\n"); + return -1; + } break; case 'h': cmd_help(); @@ -118,7 +197,7 @@ int main(int argc, char **argv) } } - printf("TAP version 13\n"); + ksft_print_header(); /* * Typically we need root privileges, because: @@ -126,7 +205,7 @@ int main(int argc, char **argv) * 2. We execute perf commands */ if (geteuid() != 0) - printf("# WARNING: not running as root, tests may fail.\n"); + return ksft_exit_fail_msg("Not running as root, abort testing.\n"); /* Detect AMD vendor */ detect_amd(); @@ -155,48 +234,26 @@ int main(int argc, char **argv) sprintf(bw_report, "reads"); sprintf(bm_type, "fill_buf"); - check_resctrlfs_support(); + if (!check_resctrlfs_support()) + return ksft_exit_fail_msg("resctrl FS does not exist\n"); + filter_dmesg(); - if (!is_amd && mbm_test) { - printf("# Starting MBM BW change ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", "mba"); - res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); - printf("%sok MBM: bw change\n", res ? "not " : ""); - mbm_test_cleanup(); - tests_run++; - } + ksft_set_plan(tests ? : 4); - if (!is_amd && mba_test) { - printf("# Starting MBA Schemata change ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[1], "%d", span); - res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); - printf("%sok MBA: schemata change\n", res ? "not " : ""); - mba_test_cleanup(); - tests_run++; - } + if (!is_amd && mbm_test) + run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); - if (cqm_test) { - printf("# Starting CQM test ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", "cqm"); - res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd); - printf("%sok CQM: test\n", res ? "not " : ""); - cqm_test_cleanup(); - tests_run++; - } + if (!is_amd && mba_test) + run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); - if (cat_test) { - printf("# Starting CAT test ...\n"); - res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); - printf("%sok CAT: test\n", res ? "not " : ""); - tests_run++; - cat_test_cleanup(); - } + if (cmt_test) + run_cmt_test(has_ben, benchmark_cmd, cpu_no); + + if (cat_test) + run_cat_test(cpu_no, no_of_bits); - printf("1..%d\n", tests_run); + umount_resctrlfs(); - return 0; + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 520fea3606d1..95224345c78e 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -221,8 +221,8 @@ static int read_from_imc_dir(char *imc_dir, int count) */ static int num_of_imcs(void) { + char imc_dir[512], *temp; unsigned int count = 0; - char imc_dir[512]; struct dirent *ep; int ret; DIR *dp; @@ -230,7 +230,25 @@ static int num_of_imcs(void) dp = opendir(DYN_PMU_PATH); if (dp) { while ((ep = readdir(dp))) { - if (strstr(ep->d_name, UNCORE_IMC)) { + temp = strstr(ep->d_name, UNCORE_IMC); + if (!temp) + continue; + + /* + * imc counters are named as "uncore_imc_<n>", hence + * increment the pointer to point to <n>. Note that + * sizeof(UNCORE_IMC) would count for null character as + * well and hence the last underscore character in + * uncore_imc'_' need not be counted. + */ + temp = temp + sizeof(UNCORE_IMC); + + /* + * Some directories under "DYN_PMU_PATH" could have + * names like "uncore_imc_free_running", hence, check if + * first character is a numerical digit or not. + */ + if (temp[0] >= '0' && temp[0] <= '9') { sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, ep->d_name); ret = read_from_imc_dir(imc_dir, count); @@ -282,9 +300,9 @@ static int initialize_mem_bw_imc(void) * Memory B/W utilized by a process on a socket can be calculated using * iMC counters. Perf events are used to read these counters. * - * Return: >= 0 on success. < 0 on failure. + * Return: = 0 on success. < 0 on failure. */ -static float get_mem_bw_imc(int cpu_no, char *bw_report) +static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) { float reads, writes, of_mul_read, of_mul_write; int imc, j, ret; @@ -355,13 +373,18 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report) close(imc_counters_config[imc][WRITE].fd); } - if (strcmp(bw_report, "reads") == 0) - return reads; + if (strcmp(bw_report, "reads") == 0) { + *bw_imc = reads; + return 0; + } - if (strcmp(bw_report, "writes") == 0) - return writes; + if (strcmp(bw_report, "writes") == 0) { + *bw_imc = writes; + return 0; + } - return (reads + writes); + *bw_imc = reads + writes; + return 0; } void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id) @@ -397,10 +420,10 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, return; } - if (strcmp(resctrl_val, "mbm") == 0) + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) set_mbm_path(ctrlgrp, mongrp, resource_id); - if ((strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { if (ctrlgrp) sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, ctrlgrp, resource_id); @@ -420,9 +443,8 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, * 1. If con_mon grp is given, then read from it * 2. If con_mon grp is not given, then read from root con_mon grp */ -static unsigned long get_mem_bw_resctrl(void) +static int get_mem_bw_resctrl(unsigned long *mbm_total) { - unsigned long mbm_total = 0; FILE *fp; fp = fopen(mbm_total_path, "r"); @@ -431,7 +453,7 @@ static unsigned long get_mem_bw_resctrl(void) return -1; } - if (fscanf(fp, "%lu", &mbm_total) <= 0) { + if (fscanf(fp, "%lu", mbm_total) <= 0) { perror("Could not get mbm local bytes"); fclose(fp); @@ -439,7 +461,7 @@ static unsigned long get_mem_bw_resctrl(void) } fclose(fp); - return mbm_total; + return 0; } pid_t bm_pid, ppid; @@ -449,7 +471,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) kill(bm_pid, SIGKILL); umount_resctrlfs(); tests_cleanup(); - printf("Ending\n\n"); + ksft_print_msg("Ending\n\n"); exit(EXIT_SUCCESS); } @@ -492,7 +514,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } -static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num) +static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) { if (strlen(ctrlgrp) && strlen(mongrp)) sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, @@ -512,7 +534,7 @@ static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num) * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc) + * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) */ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, int cpu_no, char *resctrl_val) @@ -524,14 +546,15 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, return; } - if (strcmp(resctrl_val, "cqm") == 0) - set_cqm_path(ctrlgrp, mongrp, resource_id); + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) + set_cmt_path(ctrlgrp, mongrp, resource_id); } static int measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) { - unsigned long bw_imc, bw_resc, bw_resc_end; + unsigned long bw_resc, bw_resc_end; + float bw_imc; int ret; /* @@ -541,13 +564,13 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) * Compare the two values to validate resctrl value. * It takes 1sec to measure the data. */ - bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report); - if (bw_imc <= 0) - return bw_imc; + ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc); + if (ret < 0) + return ret; - bw_resc_end = get_mem_bw_resctrl(); - if (bw_resc_end <= 0) - return bw_resc_end; + ret = get_mem_bw_resctrl(&bw_resc_end); + if (ret < 0) + return ret; bw_resc = (bw_resc_end - *bw_resc_start) / MB; ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); @@ -579,8 +602,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); - if ((strcmp(resctrl_val, "mba")) == 0 || - (strcmp(resctrl_val, "mbm")) == 0) { + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || + !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { ret = validate_bw_report_request(param->bw_report); if (ret) return ret; @@ -645,7 +668,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) PARENT_EXIT("Child is done"); } - printf("# benchmark PID: %d\n", bm_pid); + ksft_print_msg("Benchmark PID: %d\n", bm_pid); /* * Register CTRL-C handler for parent, as it has to kill benchmark @@ -674,15 +697,15 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (ret) goto out; - if ((strcmp(resctrl_val, "mbm") == 0) || - (strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = initialize_mem_bw_imc(); if (ret) goto out; initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); - } else if (strcmp(resctrl_val, "cqm") == 0) + } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); @@ -710,8 +733,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if ((strcmp(resctrl_val, "mbm") == 0) || - (strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; @@ -721,7 +744,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) ret = measure_vals(param, &bw_resc_start); if (ret) break; - } else if (strcmp(resctrl_val, "cqm") == 0) { + } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 19c0ec4045a4..5f5a166ade60 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -10,8 +10,6 @@ */ #include "resctrl.h" -int tests_run; - static int find_resctrl_mount(char *buffer) { FILE *mounts; @@ -49,8 +47,6 @@ static int find_resctrl_mount(char *buffer) return -ENOENT; } -char cbm_mask[256]; - /* * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl * @mum_resctrlfs: Should the resctrl FS be remounted? @@ -70,28 +66,25 @@ int remount_resctrlfs(bool mum_resctrlfs) if (ret) strcpy(mountpoint, RESCTRL_PATH); - if (!ret && mum_resctrlfs && umount(mountpoint)) { - printf("not ok unmounting \"%s\"\n", mountpoint); - perror("# umount"); - tests_run++; - } + if (!ret && mum_resctrlfs && umount(mountpoint)) + ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint); if (!ret && !mum_resctrlfs) return 0; + ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH); ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL); - printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "", - RESCTRL_PATH); if (ret) perror("# mount"); - tests_run++; - return ret; } int umount_resctrlfs(void) { + if (find_resctrl_mount(NULL)) + return 0; + if (umount(RESCTRL_PATH)) { perror("# Unable to umount resctrl"); @@ -205,16 +198,18 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size) /* * get_cbm_mask - Get cbm mask for given cache * @cache_type: Cache level L2/L3 - * - * Mask is stored in cbm_mask which is global variable. + * @cbm_mask: cbm_mask returned as a string * * Return: = 0 on success, < 0 on failure. */ -int get_cbm_mask(char *cache_type) +int get_cbm_mask(char *cache_type, char *cbm_mask) { char cbm_mask_path[1024]; FILE *fp; + if (!cbm_mask) + return -1; + sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type); fp = fopen(cbm_mask_path, "r"); @@ -268,7 +263,7 @@ int get_core_sibling(int cpu_no) while (token) { sibling_cpu_no = atoi(token); /* Skipping core 0 as we don't want to run test on core 0 */ - if (sibling_cpu_no != 0) + if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no) break; token = strtok(NULL, "-,"); } @@ -334,7 +329,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext) operation = atoi(benchmark_cmd[4]); sprintf(resctrl_val, "%s", benchmark_cmd[5]); - if (strcmp(resctrl_val, "cqm") != 0) + if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) buffer_span = span * MB; else buffer_span = span; @@ -458,9 +453,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" and "cqm" test */ - if ((strcmp(resctrl_val, "cqm") == 0) || - (strcmp(resctrl_val, "mbm") == 0)) { + /* Create mon grp and write pid into it for "mbm" and "cmt" test */ + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) || + !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { if (strlen(mongrp)) { sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); @@ -477,13 +472,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, } out: - printf("%sok writing benchmark parameters to resctrl FS\n", - ret ? "not " : ""); + ksft_print_msg("Writing benchmark parameters to resctrl FS\n"); if (ret) perror("# writing to resctrlfs"); - tests_run++; - return ret; } @@ -505,13 +497,13 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) int resource_id, ret = 0; FILE *fp; - if ((strcmp(resctrl_val, "mba") != 0) && - (strcmp(resctrl_val, "cat") != 0) && - (strcmp(resctrl_val, "cqm") != 0)) + if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) && + strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) && + strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) return -ENOENT; if (!schemata) { - printf("# Skipping empty schemata update\n"); + ksft_print_msg("Skipping empty schemata update\n"); return -1; } @@ -528,9 +520,10 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); - if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) || + !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); - if (strcmp(resctrl_val, "mba") == 0) + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); fp = fopen(controlgroup, "w"); @@ -551,10 +544,9 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) fclose(fp); out: - printf("%sok Write schema \"%s\" to resctrl FS%s%s\n", - ret ? "not " : "", schema, ret ? " # " : "", - ret ? reason : ""); - tests_run++; + ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n", + schema, ret ? " # " : "", + ret ? reason : ""); return ret; } @@ -578,18 +570,20 @@ bool check_resctrlfs_support(void) fclose(inf); - printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not "); - tests_run++; + ksft_print_msg("%s Check kernel supports resctrl filesystem\n", + ret ? "Pass:" : "Fail:"); + + if (!ret) + return ret; dp = opendir(RESCTRL_PATH); - printf("%sok resctrl mountpoint \"%s\" exists\n", - dp ? "" : "not ", RESCTRL_PATH); + ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n", + dp ? "Pass:" : "Fail:", RESCTRL_PATH); if (dp) closedir(dp); - tests_run++; - printf("# resctrl filesystem %s mounted\n", - find_resctrl_mount(NULL) ? "not" : "is"); + ksft_print_msg("resctrl filesystem %s mounted\n", + find_resctrl_mount(NULL) ? "not" : "is"); return ret; } @@ -615,26 +609,56 @@ char *fgrep(FILE *inf, const char *str) * validate_resctrl_feature_request - Check if requested feature is valid. * @resctrl_val: Requested feature * - * Return: 0 on success, non-zero on failure + * Return: True if the feature is supported, else false */ -bool validate_resctrl_feature_request(char *resctrl_val) +bool validate_resctrl_feature_request(const char *resctrl_val) { - FILE *inf = fopen("/proc/cpuinfo", "r"); + struct stat statbuf; bool found = false; char *res; + FILE *inf; - if (!inf) + if (!resctrl_val) return false; - res = fgrep(inf, "flags"); - - if (res) { - char *s = strchr(res, ':'); + if (remount_resctrlfs(false)) + return false; - found = s && !strstr(s, resctrl_val); - free(res); + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { + if (!stat(L3_PATH, &statbuf)) + return true; + } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { + if (!stat(MB_PATH, &statbuf)) + return true; + } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { + if (!stat(L3_MON_PATH, &statbuf)) { + inf = fopen(L3_MON_FEATURES_PATH, "r"); + if (!inf) + return false; + + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { + res = fgrep(inf, "llc_occupancy"); + if (res) { + found = true; + free(res); + } + } + + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { + res = fgrep(inf, "mbm_total_bytes"); + if (res) { + free(res); + res = fgrep(inf, "mbm_local_bytes"); + if (res) { + found = true; + free(res); + } + } + } + fclose(inf); + } } - fclose(inf); return found; } @@ -671,9 +695,9 @@ int filter_dmesg(void) while (fgets(line, 1024, fp)) { if (strstr(line, "intel_rdt:")) - printf("# dmesg: %s", line); + ksft_print_msg("dmesg: %s", line); if (strstr(line, "resctrl:")) - printf("# dmesg: %s", line); + ksft_print_msg("dmesg: %s", line); } fclose(fp); waitpid(pid, NULL, 0); diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 592c1ccf4576..0bd73428d2f3 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -14,7 +14,7 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) -#include "../../../../arch/x86/kernel/cpu/sgx/arch.h" +#include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" #include "../../../../arch/x86/include/uapi/asm/sgx.h" diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d43b75aaa55..f441ac34b4d4 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -45,19 +45,19 @@ static bool encl_map_bin(const char *path, struct encl *encl) fd = open(path, O_RDONLY); if (fd == -1) { - perror("open()"); + perror("enclave executable open()"); return false; } ret = stat(path, &sb); if (ret) { - perror("stat()"); + perror("enclave executable stat()"); goto err; } bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (bin == MAP_FAILED) { - perror("mmap()"); + perror("enclave executable mmap()"); goto err; } @@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl *encl) ioc.src = (unsigned long)secs; rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc); if (rc) { - fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_CREATE failed"); munmap((void *)secs->base, encl->encl_size); return false; } @@ -116,31 +115,72 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); if (rc < 0) { - fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n", - errno); + perror("SGX_IOC_ENCLAVE_ADD_PAGES failed"); return false; } return true; } + + bool encl_load(const char *path, struct encl *encl) { + const char device_path[] = "/dev/sgx_enclave"; Elf64_Phdr *phdr_tbl; off_t src_offset; Elf64_Ehdr *ehdr; + struct stat sb; + void *ptr; int i, j; int ret; + int fd = -1; memset(encl, 0, sizeof(*encl)); - ret = open("/dev/sgx_enclave", O_RDWR); - if (ret < 0) { - fprintf(stderr, "Unable to open /dev/sgx_enclave\n"); + fd = open(device_path, O_RDWR); + if (fd < 0) { + perror("Unable to open /dev/sgx_enclave"); + goto err; + } + + ret = stat(device_path, &sb); + if (ret) { + perror("device file stat()"); + goto err; + } + + /* + * This just checks if the /dev file has these permission + * bits set. It does not check that the current user is + * the owner or in the owning group. + */ + if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { + fprintf(stderr, "no execute permissions on device file %s\n", device_path); + goto err; + } + + ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + perror("mmap for read"); + goto err; + } + munmap(ptr, PAGE_SIZE); + +#define ERR_MSG \ +"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \ +" Check that current user has execute permissions on %s and \n" \ +" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \ +" If so, remount it executable: mount -o remount,exec /dev\n\n" + + ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + fprintf(stderr, ERR_MSG, device_path); goto err; } + munmap(ptr, PAGE_SIZE); - encl->fd = ret; + encl->fd = fd; if (!encl_map_bin(path, encl)) goto err; @@ -217,6 +257,8 @@ bool encl_load(const char *path, struct encl *encl) return true; err: + if (fd != -1) + close(fd); encl_delete(encl); return false; } @@ -229,7 +271,7 @@ static bool encl_map_area(struct encl *encl) area = mmap(NULL, encl_size * 2, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (area == MAP_FAILED) { - perror("mmap"); + perror("reservation mmap()"); return false; } @@ -268,8 +310,7 @@ bool encl_build(struct encl *encl) ioc.sigstruct = (uint64_t)&encl->sigstruct; ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc); if (ret) { - fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_INIT failed"); return false; } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 724cec700926..d304a4044eb9 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -15,6 +15,7 @@ #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/auxv.h> #include "defines.h" #include "main.h" #include "../kselftest.h" @@ -28,24 +29,6 @@ struct vdso_symtab { Elf64_Word *elf_hashtab; }; -static void *vdso_get_base_addr(char *envp[]) -{ - Elf64_auxv_t *auxv; - int i; - - for (i = 0; envp[i]; i++) - ; - - auxv = (Elf64_auxv_t *)&envp[i + 1]; - - for (i = 0; auxv[i].a_type != AT_NULL; i++) { - if (auxv[i].a_type == AT_SYSINFO_EHDR) - return (void *)auxv[i].a_un.a_val; - } - - return NULL; -} - static Elf64_Dyn *vdso_get_dyntab(void *addr) { Elf64_Ehdr *ehdr = addr; @@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r return 0; } -int main(int argc, char *argv[], char *envp[]) +int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; @@ -195,7 +178,7 @@ int main(int argc, char *argv[], char *envp[]) addr = mmap((void *)encl.encl_base + seg->offset, seg->size, seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0); if (addr == MAP_FAILED) { - fprintf(stderr, "mmap() failed, errno=%d.\n", errno); + perror("mmap() segment failed"); exit(KSFT_FAIL); } } @@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[]) memset(&run, 0, sizeof(run)); run.tcs = encl.encl_base; - addr = vdso_get_base_addr(envp); + /* Get vDSO base address */ + addr = (void *)getauxval(AT_SYSINFO_EHDR); if (!addr) goto err; diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index bfc974b4572d..ef8eb3604595 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -3,7 +3,7 @@ * (C) Copyright IBM 2012 * Licensed under the GPLv2 * - * NOTE: This is a meta-test which quickly changes the clocksourc and + * NOTE: This is a meta-test which quickly changes the clocksource and * then uses other tests to detect problems. Thus this test requires * that the inconsistency-check and nanosleep tests be present in the * same directory it is run from. @@ -134,7 +134,7 @@ int main(int argv, char **argc) return -1; } - /* Check everything is sane before we start switching asyncrhonously */ + /* Check everything is sane before we start switching asynchronously */ for (i = 0; i < count; i++) { printf("Validating clocksource %s\n", clocksource_list[i]); if (change_clocksource(clocksource_list[i])) { diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index 19e46ed5dfb5..23eb398c8140 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -5,7 +5,7 @@ * Licensed under the GPLv2 * * This test signals the kernel to insert a leap second - * every day at midnight GMT. This allows for stessing the + * every day at midnight GMT. This allows for stressing the * kernel's leap-second behavior, as well as how well applications * handle the leap-second discontinuity. * diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index dc80728ed191..f70802c5dd0d 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -4,10 +4,10 @@ * (C) Copyright 2013, 2015 Linaro Limited * Licensed under the GPL * - * This test demonstrates leapsecond deadlock that is possibe + * This test demonstrates leapsecond deadlock that is possible * on kernels from 2.6.26 to 3.3. * - * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * WARNING: THIS WILL LIKELY HARD HANG SYSTEMS AND MAY LOSE DATA * RUN AT YOUR OWN RISK! * To build: * $ gcc leapcrash.c -o leapcrash -lrt diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index cf3e48919874..80aed4bf06fb 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -76,7 +76,7 @@ void checklist(struct timespec *list, int size) /* The shared thread shares a global list * that each thread fills while holding the lock. - * This stresses clock syncronization across cpus. + * This stresses clock synchronization across cpus. */ void *shared_thread(void *arg) { diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S index a71d92da8f46..f3f56e681e9f 100644 --- a/tools/testing/selftests/x86/thunks_32.S +++ b/tools/testing/selftests/x86/thunks_32.S @@ -45,3 +45,5 @@ call64_from_32: ret .size call64_from_32, .-call64_from_32 + +.section .note.GNU-stack,"",%progbits diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8 index a15d20063b98..1f26e4a00638 100644 --- a/tools/usb/usbip/doc/usbip.8 +++ b/tools/usb/usbip/doc/usbip.8 @@ -50,9 +50,16 @@ Attach a remote USB device. .PP .HP +\fBattach\fR \-\-remote=<\fIhost\fR> \-\-device=<\fIdev_id\fR> +.IP +Attach a remote USB gadget. +Only used when the remote usbipd is in device mode. +.PP + +.HP \fBdetach\fR \-\-port=<\fIport\fR> .IP -Detach an imported USB device. +Detach an imported USB device/gadget. .PP .HP @@ -74,11 +81,25 @@ List USB devices exported by a remote host. .PP .HP +\fBlist\fR \-\-device +.IP +List USB gadgets of local usbip-vudc. +Only used when the local usbipd is in device mode. +Note that this can not list usbip-vudc USB gadgets of the remote device mode usbipd. +.PP + +.HP \fBlist\fR \-\-local .IP List local USB devices. .PP +.HP +\fBport\fR +.IP +List imported devices/gadgets. +.PP + .SH EXAMPLES @@ -90,8 +111,27 @@ List local USB devices. client:# usbip attach --remote=server --busid=1-2 - Connect the remote USB device. + client:# usbip port + - List imported devices/gadgets. + client:# usbip detach --port=0 - Detach the usb device. +The following example shows the usage of device mode + + server:# usbip list --device + - List gadgets exported by local usbipd server. + + client:# modprobe vhci-hcd + + client:# usbip attach --remote=server --device=usbip-vudc.0 + - Connect the remote USB gadget. + + client:# usbip port + - List imported devices/gadgets. + + client:# usbip detach --port=0 + - Detach the usb gadget. + .SH "SEE ALSO" \fBusbipd\fP\fB(8)\fB\fP diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8 index fb62a756893b..d974394f86a1 100644 --- a/tools/usb/usbip/doc/usbipd.8 +++ b/tools/usb/usbip/doc/usbipd.8 @@ -30,6 +30,12 @@ Bind to IPv6. Default is both. .PP .HP +\fB\-e\fR, \fB\-\-device\fR +.IP +Run in device mode. Rather than drive an attached device, create a virtual UDC to bind gadgets to. +.PP + +.HP \fB\-D\fR, \fB\-\-daemon\fR .IP Run as a daemon process. @@ -86,6 +92,26 @@ USB/IP client can connect and use exported devices. - A usb device 1-2 is now exportable to other hosts! - Use 'usbip unbind --busid=1-2' when you want to shutdown exporting and use the device locally. +The following example shows the usage of device mode + + server:# modprobe usbip-vudc + - Use /sys/class/udc/ interface. + - usbip-host is independent of this module. + + server:# usbipd -e -D + - Start usbip daemon in device mode. + + server:# modprobe g_mass_storage file=/tmp/tmp.img + - Bind a gadget to usbip-vudc. + - in this example, a mass storage gadget is bound. + + server:# usbip list --device + - List gadgets exported by local usbipd server. + + server:# modprobe -r g_mass_storage + - Unbind a gadget from usbip-vudc. + - in this example, the previous mass storage gadget is unbound. + .SH "SEE ALSO" \fBusbip\fP\fB(8)\fB\fP diff --git a/tools/usb/usbip/libsrc/list.h b/tools/usb/usbip/libsrc/list.h index a941671e4900..9cca2425587b 100644 --- a/tools/usb/usbip/libsrc/list.h +++ b/tools/usb/usbip/libsrc/list.h @@ -77,17 +77,17 @@ static inline void __list_del(struct list_head * prev, struct list_head * next) #define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) #define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ -static inline void __list_del_entry(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index 8625b0f514ee..3d810bcca02f 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -33,7 +33,8 @@ static const char usbip_list_usage_string[] = "usbip list [-p|--parsable] <args>\n" " -p, --parsable Parsable list format\n" " -r, --remote=<host> List the exportable USB devices on <host>\n" - " -l, --local List the local USB devices\n"; + " -l, --local List the local USB devices\n" + " -d, --device List the local USB gadgets bound to usbip-vudc\n"; void usbip_list_usage(void) { |