// SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Facebook */ #include #include #include #include #include #include #include #include #include "percpu_freelist.h" #define STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ BPF_F_STACK_BUILD_ID) struct stack_map_bucket { struct pcpu_freelist_node fnode; u32 hash; u32 nr; u64 data[]; }; struct bpf_stack_map { struct bpf_map map; void *elems; struct pcpu_freelist freelist; u32 n_buckets; struct stack_map_bucket *buckets[]; }; /* irq_work to run up_read() for build_id lookup in nmi context */ struct stack_map_irq_work { struct irq_work irq_work; struct rw_semaphore *sem; }; static void do_up_read(struct irq_work *entry) { struct stack_map_irq_work *work; if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) return; work = container_of(entry, struct stack_map_irq_work, irq_work); up_read_non_owner(work->sem); work->sem = NULL; } static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); static inline bool stack_map_use_build_id(struct bpf_map *map) { return (map->map_flags & BPF_F_STACK_BUILD_ID); } static inline int stack_map_data_size(struct bpf_map *map) { return stack_map_use_build_id(map) ? sizeof(struct bpf_stack_build_id) : sizeof(u64); } static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, smap->map.numa_node); if (!smap->elems) return -ENOMEM; err = pcpu_freelist_init(&smap->freelist); if (err) goto free_elems; pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, smap->map.max_entries); return 0; free_elems: bpf_map_area_free(smap->elems); return err; } /* Called from syscall */ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_stack_map *smap; struct bpf_map_memory mem; u64 cost, n_buckets; int err; if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || value_size < 8 || value_size % 8) return ERR_PTR(-EINVAL); BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); if (attr->map_flags & BPF_F_STACK_BUILD_ID) { if (value_size % sizeof(struct bpf_stack_build_id) || value_size / sizeof(struct bpf_stack_build_id) > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); err = bpf_map_charge_init(&mem, cost); if (err) return ERR_PTR(err); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) { bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); } bpf_map_init_from_attr(&smap->map, attr); smap->map.value_size = value_size; smap->n_buckets = n_buckets; err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) goto free_charge; err = prealloc_elems_and_freelist(smap); if (err) goto put_buffers; bpf_map_charge_move(&smap->map.memory, &mem); return &smap->map; put_buffers: put_callchain_buffers(); free_charge: bpf_map_charge_finish(&mem); bpf_map_area_free(smap); return ERR_PTR(err); } #define BPF_BUILD_ID 3 /* * Parse build id from the note segment. This logic can be shared between * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are * identical. */ static inline int stack_map_parse_build_id(void *page_addr, unsigned char *build_id, void *note_start, Elf32_Word note_size) { Elf32_Word note_offs = 0, new_offs; /* check for overflow */ if (note_start < page_addr || note_start + note_size < note_start) return -EINVAL; /* only supports note that fits in the first page */ if (note_start + note_size > page_addr + PAGE_SIZE) return -EINVAL; while (note_offs + sizeof(Elf32_Nhdr) < note_size) { Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); if (nhdr->n_type == BPF_BUILD_ID && nhdr->n_namesz == sizeof("GNU") && nhdr->n_descsz > 0 && nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { memcpy(build_id, note_start + note_offs + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz); return 0; } new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); if (new_offs <= note_offs) /* overflow */ break; note_offs = new_offs; } return -EINVAL; } /* Parse build ID from 32-bit ELF */ static int stack_map_get_build_id_32(void *page_addr, unsigned char *build_id) { Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; Elf32_Phdr *phdr; int i; /* only supports phdr that fits in one page */ if (ehdr->e_phnum > (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) return -EINVAL; phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); for (i = 0; i < ehdr->e_phnum; ++i) if (phdr[i].p_type == PT_NOTE) return stack_map_parse_build_id(page_addr, build_id, page_addr + phdr[i].p_offset, phdr[i].p_filesz); return -EINVAL; } /* Parse build ID from 64-bit ELF */ static int stack_map_get_build_id_64(void *page_addr, unsigned char *build_id) { Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; Elf64_Phdr *phdr; int i; /* only supports phdr that fits in one page */ if (ehdr->e_phnum > (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) return -EINVAL; phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); for (i = 0; i < ehdr->e_phnum; ++i) if (phdr[i].p_type == PT_NOTE) return stack_map_parse_build_id(page_addr, build_id, page_addr + phdr[i].p_offset, phdr[i].p_filesz); return -EINVAL; } /* Parse build ID of ELF file mapped to vma */ static int stack_map_get_build_id(struct vm_area_struct *vma, unsigned char *build_id) { Elf32_Ehdr *ehdr; struct page *page; void *page_addr; int ret; /* only works for page backed storage */ if (!vma->vm_file) return -EINVAL; page = find_get_page(vma->vm_file->f_mapping, 0); if (!page) return -EFAULT; /* page not mapped */ ret = -EINVAL; page_addr = kmap_atomic(page); ehdr = (Elf32_Ehdr *)page_addr; /* compare magic x7f "ELF" */ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) goto out; /* only support executable file and shared object file */ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) goto out; if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) ret = stack_map_get_build_id_32(page_addr, build_id); else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ret = stack_map_get_build_id_64(page_addr, build_id); out: kunmap_atomic(page_addr); put_page(page); return ret; } static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u64 *ips, u32 trace_nr, bool user) { int i; struct vm_area_struct *vma; bool irq_work_busy = false; struct stack_map_irq_work *work = NULL; if (irqs_disabled()) { if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { work = this_cpu_ptr(&up_read_work); if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) { /* cannot queue more up_read, fallback */ irq_work_busy = true; } } else { /* * PREEMPT_RT does not allow to trylock mmap sem in * interrupt disabled context. Force the fallback code. */ irq_work_busy = true; } } /* * We cannot do up_read() when the irq is disabled, because of * risk to deadlock with rq_lock. To do build_id lookup when the * irqs are disabled, we need to run up_read() in irq_work. We use * a percpu variable to do the irq_work. If the irq_work is * already used by another lookup, we fall back to report ips. * * Same fallback is used for kernel stack (!user) on a stackmap * with build_id. */ if (!user || !current || !current->mm || irq_work_busy || down_read_trylock(¤t->mm->mmap_sem) == 0) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); } return; } for (i = 0; i < trace_nr; i++) { vma = find_vma(current->mm, ips[i]); if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) { /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); continue; } id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] - vma->vm_start; id_offs[i].status = BPF_STACK_BUILD_ID_VALID; } if (!work) { up_read(¤t->mm->mmap_sem); } else { work->sem = ¤t->mm->mmap_sem; irq_work_queue(&work->irq_work); /* * The irq_work will release the mmap_sem with * up_read_non_owner(). The rwsem_release() is called * here to release the lock from lockdep's perspective. */ rwsem_release(¤t->mm->mmap_sem.dep_map, _RET_IP_); } } BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 max_depth = map->value_size / stack_map_data_size(map); /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ u32 init_nr = sysctl_perf_event_max_stack - max_depth; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; bool kernel = !user; u64 *ips; bool hash_matches; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; trace = get_perf_callchain(regs, init_nr, kernel, user, sysctl_perf_event_max_stack, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ return -EFAULT; /* get_perf_callchain() guarantees that trace->nr >= init_nr * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth */ trace_nr = trace->nr - init_nr; if (trace_nr <= skip) /* skipping more than usable stack trace */ return -EFAULT; trace_nr -= skip; trace_len = trace_nr * sizeof(u64); ips = trace->ip + skip + init_nr; hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); id = hash & (smap->n_buckets - 1); bucket = READ_ONCE(smap->buckets[id]); hash_matches = bucket && bucket->hash == hash; /* fast cmp */ if (hash_matches && flags & BPF_F_FAST_STACK_CMP) return id; if (stack_map_use_build_id(map)) { /* for build_id+offset, pop a bucket before slow cmp */ new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; new_bucket->nr = trace_nr; stack_map_get_build_id_offset( (struct bpf_stack_build_id *)new_bucket->data, ips, trace_nr, user); trace_len = trace_nr * sizeof(struct bpf_stack_build_id); if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, new_bucket->data, trace_len) == 0) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return id; } if (bucket && !(flags & BPF_F_REUSE_STACKID)) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return -EEXIST; } } else { if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, ips, trace_len) == 0) return id; if (bucket && !(flags & BPF_F_REUSE_STACKID)) return -EEXIST; new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; memcpy(new_bucket->data, ips, trace_len); } new_bucket->hash = hash; new_bucket->nr = trace_nr; old_bucket = xchg(&smap->buckets[id], new_bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return id; } const struct bpf_func_proto bpf_get_stackid_proto = { .func = bpf_get_stackid, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, u64, flags) { u32 init_nr, trace_nr, copy_len, elem_size, num_elem; bool user_build_id = flags & BPF_F_USER_BUILD_ID; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; int err = -EINVAL; u64 *ips; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID))) goto clear; if (kernel && user_build_id) goto clear; elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) : sizeof(u64); if (unlikely(size % elem_size)) goto clear; num_elem = size / elem_size; if (sysctl_perf_event_max_stack < num_elem) init_nr = 0; else init_nr = sysctl_perf_event_max_stack - num_elem; trace = get_perf_callchain(regs, init_nr, kernel, user, sysctl_perf_event_max_stack, false, false); if (unlikely(!trace)) goto err_fault; trace_nr = trace->nr - init_nr; if (trace_nr < skip) goto err_fault; trace_nr -= skip; trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; copy_len = trace_nr * elem_size; ips = trace->ip + skip + init_nr; if (user && user_build_id) stack_map_get_build_id_offset(buf, ips, trace_nr, user); else memcpy(buf, ips, copy_len); if (size > copy_len) memset(buf + copy_len, 0, size - copy_len); return copy_len; err_fault: err = -EFAULT; clear: memset(buf, 0, size); return err; } const struct bpf_func_proto bpf_get_stack_proto = { .func = bpf_get_stack, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; /* Called from eBPF program */ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } /* Called from syscall */ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *old_bucket; u32 id = *(u32 *)key, trace_len; if (unlikely(id >= smap->n_buckets)) return -ENOENT; bucket = xchg(&smap->buckets[id], NULL); if (!bucket) return -ENOENT; trace_len = bucket->nr * stack_map_data_size(map); memcpy(value, bucket->data, trace_len); memset(value + trace_len, 0, map->value_size - trace_len); old_bucket = xchg(&smap->buckets[id], bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); u32 id; WARN_ON_ONCE(!rcu_read_lock_held()); if (!key) { id = 0; } else { id = *(u32 *)key; if (id >= smap->n_buckets || !smap->buckets[id]) id = 0; else id++; } while (id < smap->n_buckets && !smap->buckets[id]) id++; if (id >= smap->n_buckets) return -ENOENT; *(u32 *)next_key = id; return 0; } static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ static int stack_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *old_bucket; u32 id = *(u32 *)key; if (unlikely(id >= smap->n_buckets)) return -E2BIG; old_bucket = xchg(&smap->buckets[id], NULL); if (old_bucket) { pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } else { return -ENOENT; } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void stack_map_free(struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); /* wait for bpf programs to complete before freeing stack map */ synchronize_rcu(); bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); bpf_map_area_free(smap); put_callchain_buffers(); } const struct bpf_map_ops stack_trace_map_ops = { .map_alloc = stack_map_alloc, .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, .map_lookup_elem = stack_map_lookup_elem, .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, }; static int __init stack_map_init(void) { int cpu; struct stack_map_irq_work *work; for_each_possible_cpu(cpu) { work = per_cpu_ptr(&up_read_work, cpu); init_irq_work(&work->irq_work, do_up_read); } return 0; } subsys_initcall(stack_map_init); 100%'> -rw-r--r--tools/arch/x86/include/uapi/asm/svm.h4
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h7
-rw-r--r--tools/arch/x86/lib/inat.c13
-rw-r--r--tools/arch/x86/lib/insn.c35
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--tools/arch/x86/tools/gen-cpu-feature-names-x86.awk34
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk44
-rw-r--r--tools/bootconfig/main.c4
-rw-r--r--tools/bpf/Makefile13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-token.rst64
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool37
-rw-r--r--tools/bpf/bpftool/btf_dumper.c4
-rw-r--r--tools/bpf/bpftool/cgroup.c4
-rw-r--r--tools/bpf/bpftool/common.c93
-rw-r--r--tools/bpf/bpftool/feature.c86
-rw-r--r--tools/bpf/bpftool/gen.c68
-rw-r--r--tools/bpf/bpftool/link.c54
-rw-r--r--tools/bpf/bpftool/main.c29
-rw-r--r--tools/bpf/bpftool/main.h21
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/bpf/bpftool/prog.c33
-rw-r--r--tools/bpf/bpftool/sign.c217
-rw-r--r--tools/bpf/bpftool/token.c210
-rw-r--r--tools/bpf/bpftool/tracelog.c11
-rw-r--r--tools/bpf/runqslower/Makefile91
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c106
-rw-r--r--tools/bpf/runqslower/runqslower.c171
-rw-r--r--tools/bpf/runqslower/runqslower.h13
-rw-r--r--tools/build/Build2
-rw-r--r--tools/build/Makefile21
-rw-r--r--tools/build/Makefile.feature12
-rw-r--r--tools/build/feature/Makefile26
-rw-r--r--tools/build/feature/test-all.c24
-rw-r--r--tools/build/feature/test-get_cpuid.c8
-rw-r--r--tools/build/feature/test-get_current_dir_name.c11
-rw-r--r--tools/build/feature/test-libbpf-strings.c10
-rw-r--r--tools/build/feature/test-libslang-include-subdir.c7
-rw-r--r--tools/dma/.gitignore3
-rw-r--r--tools/dma/Makefile55
-rw-r--r--tools/dma/config (renamed from tools/testing/selftests/dma/config)0
-rw-r--r--tools/dma/dma_map_benchmark.c (renamed from tools/testing/selftests/dma/dma_map_benchmark.c)3
-rwxr-xr-xtools/docs/check-variable-fonts.py37
-rwxr-xr-xtools/docs/checktransupdate.py307
-rwxr-xr-xtools/docs/documentation-file-ref-check245
-rwxr-xr-xtools/docs/features-refresh.sh98
-rwxr-xr-xtools/docs/find-unused-docs.sh62
-rwxr-xr-xtools/docs/gen-redirects.py54
-rwxr-xr-xtools/docs/gen-renames.py130
-rwxr-xr-xtools/docs/get_abi.py214
-rwxr-xr-xtools/docs/get_feat.py225
-rwxr-xr-xtools/docs/list-arch.sh11
-rwxr-xr-xtools/docs/parse-headers.py60
-rwxr-xr-xtools/docs/sphinx-build-wrapper864
-rwxr-xr-xtools/docs/sphinx-pre-install1543
-rwxr-xr-xtools/docs/test_doc_build.py513
-rw-r--r--tools/gpio/Makefile2
-rw-r--r--tools/iio/iio_event_monitor.c10
-rw-r--r--tools/include/asm-generic/bitops/__fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls64.h4
-rw-r--r--tools/include/asm-generic/io.h482
-rw-r--r--tools/include/asm/io.h11
-rw-r--r--tools/include/linux/args.h28
-rw-r--r--tools/include/linux/atomic.h22
-rw-r--r--tools/include/linux/bitmap.h1
-rw-r--r--tools/include/linux/bits.h29
-rw-r--r--tools/include/linux/cfi_types.h29
-rw-r--r--tools/include/linux/compiler.h4
-rw-r--r--tools/include/linux/gfp_types.h393
-rw-r--r--tools/include/linux/interval_tree_generic.h10
-rw-r--r--tools/include/linux/io.h4
-rw-r--r--tools/include/linux/livepatch_external.h76
-rw-r--r--tools/include/linux/objtool_types.h3
l---------tools/include/linux/pci_ids.h1
-rw-r--r--tools/include/linux/slab.h165
-rw-r--r--tools/include/linux/static_call_types.h4
-rw-r--r--tools/include/linux/string.h14
-rw-r--r--tools/include/nolibc/Makefile22
-rw-r--r--tools/include/nolibc/arch-arm.h2
-rw-r--r--tools/include/nolibc/arch-arm64.h2
-rw-r--r--tools/include/nolibc/arch-loongarch.h2
-rw-r--r--tools/include/nolibc/arch-m68k.h2
-rw-r--r--tools/include/nolibc/arch-mips.h2
-rw-r--r--tools/include/nolibc/arch-powerpc.h2
-rw-r--r--tools/include/nolibc/arch-riscv.h2
-rw-r--r--tools/include/nolibc/arch-s390.h7
-rw-r--r--tools/include/nolibc/arch-sh.h2
-rw-r--r--tools/include/nolibc/arch-sparc.h2
-rw-r--r--tools/include/nolibc/arch-x86.h10
-rw-r--r--tools/include/nolibc/arch.h11
-rw-r--r--tools/include/nolibc/compiler.h4
-rw-r--r--tools/include/nolibc/crt.h3
-rw-r--r--tools/include/nolibc/dirent.h6
-rw-r--r--tools/include/nolibc/getopt.h2
-rw-r--r--tools/include/nolibc/inttypes.h3
-rw-r--r--tools/include/nolibc/nolibc.h3
-rw-r--r--tools/include/nolibc/poll.h4
-rw-r--r--tools/include/nolibc/stackprotector.h2
-rw-r--r--tools/include/nolibc/std.h6
-rw-r--r--tools/include/nolibc/stdio.h10
-rw-r--r--tools/include/nolibc/stdlib.h2
-rw-r--r--tools/include/nolibc/string.h15
-rw-r--r--tools/include/nolibc/sys.h165
-rw-r--r--tools/include/nolibc/sys/auxv.h3
-rw-r--r--tools/include/nolibc/sys/mman.h5
-rw-r--r--tools/include/nolibc/sys/random.h4
-rw-r--r--tools/include/nolibc/sys/reboot.h2
-rw-r--r--tools/include/nolibc/sys/select.h103
-rw-r--r--tools/include/nolibc/sys/timerfd.h8
-rw-r--r--tools/include/nolibc/sys/uio.h49
-rw-r--r--tools/include/nolibc/sys/wait.h35
-rw-r--r--tools/include/nolibc/time.h29
-rw-r--r--tools/include/nolibc/types.h47
-rw-r--r--tools/include/nolibc/unistd.h8
-rw-r--r--tools/include/uapi/asm-generic/unistd.h8
-rw-r--r--tools/include/uapi/drm/drm.h63
-rw-r--r--tools/include/uapi/linux/bpf.h58
-rw-r--r--tools/include/uapi/linux/genetlink.h103
-rw-r--r--tools/include/uapi/linux/if_addr.h79
-rw-r--r--tools/include/uapi/linux/kvm.h30
-rw-r--r--tools/include/uapi/linux/neighbour.h229
-rw-r--r--tools/include/uapi/linux/netdev.h2
-rw-r--r--tools/include/uapi/linux/netfilter.h80
-rw-r--r--tools/include/uapi/linux/netfilter_arp.h23
-rw-r--r--tools/include/uapi/linux/nsfs.h87
-rw-r--r--tools/include/uapi/linux/perf_event.h23
-rw-r--r--tools/include/uapi/linux/rtnetlink.h848
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h2
-rw-r--r--tools/lib/bpf/bpf_helpers.h28
-rw-r--r--tools/lib/bpf/bpf_tracing.h2
-rw-r--r--tools/lib/bpf/btf.c76
-rw-r--r--tools/lib/bpf/btf.h8
-rw-r--r--tools/lib/bpf/btf_dump.c1
-rw-r--r--tools/lib/bpf/elf.c1
-rw-r--r--tools/lib/bpf/features.c1
-rw-r--r--tools/lib/bpf/gen_loader.c50
-rw-r--r--tools/lib/bpf/libbpf.c418
-rw-r--r--tools/lib/bpf/libbpf.h79
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_errno.c75
-rw-r--r--tools/lib/bpf/libbpf_internal.h21
-rw-r--r--tools/lib/bpf/libbpf_probes.c4
-rw-r--r--tools/lib/bpf/libbpf_utils.c256
-rw-r--r--tools/lib/bpf/linker.c4
-rw-r--r--tools/lib/bpf/relo_core.c1
-rw-r--r--tools/lib/bpf/ringbuf.c1
-rw-r--r--tools/lib/bpf/skel_internal.h76
-rw-r--r--tools/lib/bpf/str_error.c104
-rw-r--r--tools/lib/bpf/str_error.h19
-rw-r--r--tools/lib/bpf/usdt.bpf.h44
-rw-r--r--tools/lib/bpf/usdt.c75
-rw-r--r--tools/lib/perf/cpumap.c39
-rw-r--r--tools/lib/perf/include/perf/core.h2
-rw-r--r--tools/lib/perf/include/perf/event.h14
-rw-r--r--tools/lib/perf/mmap.c2
-rw-r--r--tools/lib/python/__init__.py0
-rw-r--r--tools/lib/python/abi/__init__.py0
-rw-r--r--tools/lib/python/abi/abi_parser.py628
-rw-r--r--tools/lib/python/abi/abi_regex.py234
-rw-r--r--tools/lib/python/abi/helpers.py38
-rw-r--r--tools/lib/python/abi/system_symbols.py378
-rwxr-xr-xtools/lib/python/feat/parse_features.py494
-rwxr-xr-xtools/lib/python/jobserver.py149
-rw-r--r--tools/lib/python/kdoc/__init__.py0
-rw-r--r--tools/lib/python/kdoc/enrich_formatter.py70
-rw-r--r--tools/lib/python/kdoc/kdoc_files.py294
-rw-r--r--tools/lib/python/kdoc/kdoc_item.py43
-rw-r--r--tools/lib/python/kdoc/kdoc_output.py824
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py1670
-rw-r--r--tools/lib/python/kdoc/kdoc_re.py270
-rwxr-xr-xtools/lib/python/kdoc/latex_fonts.py167
-rwxr-xr-xtools/lib/python/kdoc/parse_data_structs.py482
-rw-r--r--tools/lib/python/kdoc/python_version.py178
-rw-r--r--tools/lib/subcmd/help.c3
-rw-r--r--tools/lib/thermal/Makefile9
-rw-r--r--tools/lib/thermal/libthermal.map5
-rw-r--r--tools/mm/page_owner_sort.c14
-rw-r--r--tools/mm/slabinfo.c7
-rw-r--r--tools/net/sunrpc/xdrgen/generators/__init__.py11
-rw-r--r--tools/net/sunrpc/xdrgen/generators/union.py34
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j24
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j24
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j26
-rwxr-xr-xtools/net/sunrpc/xdrgen/xdrgen5
-rw-r--r--tools/net/ynl/Makefile29
-rw-r--r--tools/net/ynl/Makefile.deps1
-rw-r--r--tools/net/ynl/lib/ynl-priv.h14
-rw-r--r--tools/net/ynl/lib/ynl.c6
-rwxr-xr-xtools/net/ynl/pyynl/cli.py100
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py17
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py4
-rw-r--r--tools/net/ynl/pyynl/lib/doc_generator.py402
-rw-r--r--tools/net/ynl/pyynl/lib/nlspec.py2
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py93
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py175
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py384
-rw-r--r--tools/net/ynl/samples/.gitignore1
-rw-r--r--tools/net/ynl/samples/Makefile1
-rw-r--r--tools/net/ynl/samples/page-pool.c149
-rw-r--r--tools/net/ynl/samples/tc-filter-add.c335
-rw-r--r--tools/net/ynl/tests/Makefile32
-rw-r--r--tools/net/ynl/tests/config6
-rwxr-xr-xtools/net/ynl/tests/test_ynl_cli.sh327
-rwxr-xr-xtools/net/ynl/tests/test_ynl_ethtool.sh222
-rw-r--r--tools/net/ynl/ynltool/.gitignore2
-rw-r--r--tools/net/ynl/ynltool/Makefile55
-rw-r--r--tools/net/ynl/ynltool/json_writer.c288
-rw-r--r--tools/net/ynl/ynltool/json_writer.h75
-rw-r--r--tools/net/ynl/ynltool/main.c242
-rw-r--r--tools/net/ynl/ynltool/main.h66
-rw-r--r--tools/net/ynl/ynltool/page-pool.c461
-rw-r--r--tools/net/ynl/ynltool/qstats.c621
-rw-r--r--tools/objtool/.gitignore3
-rw-r--r--tools/objtool/Build8
-rw-r--r--tools/objtool/Makefile70
-rw-r--r--tools/objtool/arch/loongarch/decode.c62
-rw-r--r--tools/objtool/arch/loongarch/orc.c1
-rw-r--r--tools/objtool/arch/loongarch/special.c28
-rw-r--r--tools/objtool/arch/powerpc/decode.c31
-rw-r--r--tools/objtool/arch/powerpc/special.c5
-rw-r--r--tools/objtool/arch/x86/Build13
-rw-r--r--tools/objtool/arch/x86/decode.c123
-rw-r--r--tools/objtool/arch/x86/orc.c1
-rw-r--r--tools/objtool/arch/x86/special.c12
-rw-r--r--tools/objtool/builtin-check.c102
-rw-r--r--tools/objtool/builtin-klp.c53
-rw-r--r--tools/objtool/check.c1571
-rw-r--r--tools/objtool/disas.c1248
-rw-r--r--tools/objtool/elf.c822
-rw-r--r--tools/objtool/include/objtool/arch.h17
-rw-r--r--tools/objtool/include/objtool/builtin.h13
-rw-r--r--tools/objtool/include/objtool/check.h39
-rw-r--r--tools/objtool/include/objtool/checksum.h43
-rw-r--r--tools/objtool/include/objtool/checksum_types.h25
-rw-r--r--tools/objtool/include/objtool/disas.h81
-rw-r--r--tools/objtool/include/objtool/elf.h199
-rw-r--r--tools/objtool/include/objtool/endianness.h9
-rw-r--r--tools/objtool/include/objtool/klp.h35
-rw-r--r--tools/objtool/include/objtool/objtool.h6
-rw-r--r--tools/objtool/include/objtool/special.h4
-rw-r--r--tools/objtool/include/objtool/trace.h141
-rw-r--r--tools/objtool/include/objtool/util.h19
-rw-r--r--tools/objtool/include/objtool/warn.h66
-rw-r--r--tools/objtool/klp-diff.c1723
-rw-r--r--tools/objtool/klp-post-link.c168
-rw-r--r--tools/objtool/noreturns.h2
-rw-r--r--tools/objtool/objtool.c44
-rw-r--r--tools/objtool/orc_dump.c1
-rw-r--r--tools/objtool/orc_gen.c9
-rw-r--r--tools/objtool/signal.c135
-rw-r--r--tools/objtool/special.c16
-rwxr-xr-xtools/objtool/sync-check.sh2
-rw-r--r--tools/objtool/trace.c203
-rw-r--r--tools/objtool/weak.c7
-rw-r--r--tools/perf/Documentation/Build.txt15
-rw-r--r--tools/perf/Documentation/android.txt80
-rw-r--r--tools/perf/Documentation/intel-acr.txt53
-rw-r--r--tools/perf/Documentation/perf-annotate.txt1
-rw-r--r--tools/perf/Documentation/perf-arm-spe.txt118
-rw-r--r--tools/perf/Documentation/perf-bench.txt58
-rw-r--r--tools/perf/Documentation/perf-c2c.txt7
-rw-r--r--tools/perf/Documentation/perf-check.txt2
-rw-r--r--tools/perf/Documentation/perf-config.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt2
-rw-r--r--tools/perf/Documentation/perf-list.txt3
-rw-r--r--tools/perf/Documentation/perf-record.txt4
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-timechart.txt3
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt10
-rw-r--r--tools/perf/Makefile.config114
-rw-r--r--tools/perf/Makefile.perf37
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c1
-rw-r--r--tools/perf/arch/arm/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/arm/util/Build2
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c1
-rw-r--r--tools/perf/arch/arm/util/pmu.c2
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c1
-rw-r--r--tools/perf/arch/arm64/util/Build19
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c6
-rw-r--r--tools/perf/arch/arm64/util/arm64_exception_types.h15
-rw-r--r--tools/perf/arch/arm64/util/hisi-ptt.c1
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/auxtrace.c103
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/util/Build2
-rw-r--r--tools/perf/arch/s390/util/auxtrace.c1
-rw-r--r--tools/perf/arch/sh/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/sparc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c187
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl3
-rw-r--r--tools/perf/arch/x86/tests/Build4
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-test.c6
-rw-r--r--tools/perf/arch/x86/tests/topdown.c2
-rw-r--r--tools/perf/arch/x86/util/Build6
-rw-r--r--tools/perf/arch/x86/util/evsel.c114
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c6
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c51
-rw-r--r--tools/perf/arch/x86/util/pmu.c2
-rw-r--r--tools/perf/arch/x86/util/topdown.c1
-rw-r--r--tools/perf/arch/xtensa/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/evlist-open-close.c1
-rw-r--r--tools/perf/bench/find-bit-bench.c2
-rw-r--r--tools/perf/bench/futex.c1
-rw-r--r--tools/perf/bench/futex.h1
-rw-r--r--tools/perf/bench/mem-functions.c390
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/pmu-scan.c1
-rw-r--r--tools/perf/bench/synthesize.c1
-rw-r--r--tools/perf/builtin-annotate.c10
-rw-r--r--tools/perf/builtin-bench.c1
-rw-r--r--tools/perf/builtin-c2c.c195
-rw-r--r--tools/perf/builtin-check.c5
-rw-r--r--tools/perf/builtin-evlist.c3
-rw-r--r--tools/perf/builtin-inject.c48
-rw-r--r--tools/perf/builtin-kvm.c132
-rw-r--r--tools/perf/builtin-kwork.c27
-rw-r--r--tools/perf/builtin-list.c169
-rw-r--r--tools/perf/builtin-lock.c9
-rw-r--r--tools/perf/builtin-mem.c1
-rw-r--r--tools/perf/builtin-record.c163
-rw-r--r--tools/perf/builtin-report.c6
-rw-r--r--tools/perf/builtin-sched.c19
-rw-r--r--tools/perf/builtin-script.c410
-rw-r--r--tools/perf/builtin-stat.c470
-rw-r--r--tools/perf/builtin-timechart.c15
-rw-r--r--tools/perf/builtin-top.c8
-rw-r--r--tools/perf/builtin-trace.c39
-rwxr-xr-xtools/perf/check-headers.sh12
-rw-r--r--tools/perf/perf.h2
-rw-r--r--tools/perf/pmu-events/Build27
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json26
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json9
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json450
-rw-r--r--tools/perf/pmu-events/arch/common/common/legacy-hardware.json72
-rw-r--r--tools/perf/pmu-events/arch/common/common/metrics.json151
-rw-r--r--tools/perf/pmu-events/arch/common/common/software.json6
-rw-r--r--tools/perf/pmu-events/arch/common/common/tool.json12
-rw-r--r--tools/perf/pmu-events/arch/riscv/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z16/transaction.json8
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z17/transaction.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/cache.json151
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/floating-point.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/other.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/pipeline.json169
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/cache.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json180
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/cache.json465
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json73
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/frontend.json112
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/memory.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/other.json121
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json444
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json139
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json163
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json143
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/other.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/cache.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/cache.json231
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json131
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/memory.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/other.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json143
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json155
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/cache.json170
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/frontend.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json216
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/memory.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/other.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json111
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv24
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json145
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json103
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json173
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/cache.json1163
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/counter.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/floating-point.json359
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/frontend.json535
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/memory.json115
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json1903
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/uncore-memory.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json248
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json97
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json19
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json163
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/other.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/cache.json41
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json103
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json97
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c2854
-rwxr-xr-xtools/perf/pmu-events/jevents.py73
-rwxr-xr-xtools/perf/pmu-events/make_legacy_cache.py129
-rw-r--r--tools/perf/pmu-events/metric.py85
-rwxr-xr-xtools/perf/pmu-events/metric_test.py4
-rw-r--r--tools/perf/pmu-events/pmu-events.h3
-rwxr-xr-xtools/perf/python/ilist.py515
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Build2
-rw-r--r--tools/perf/tests/Build3
-rw-r--r--tools/perf/tests/builtin-test.c5
-rw-r--r--tools/perf/tests/code-reading.c126
-rw-r--r--tools/perf/tests/hwmon_pmu.c1
-rw-r--r--tools/perf/tests/kallsyms-split.c156
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/make12
-rw-r--r--tools/perf/tests/maps.c82
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/parse-events.c2078
-rw-r--r--tools/perf/tests/parse-metric.c3
-rw-r--r--tools/perf/tests/perf-record.c40
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c4
-rw-r--r--tools/perf/tests/pfm.c1
-rw-r--r--tools/perf/tests/pmu-events.c26
-rw-r--r--tools/perf/tests/pmu.c3
-rw-r--r--tools/perf/tests/python-use.c27
-rwxr-xr-xtools/perf/tests/shell/amd-ibs-swfilt.sh51
-rw-r--r--tools/perf/tests/shell/attr/test-stat-default7
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-17
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-27
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-37
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_blacklisted.sh20
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_kernel.sh97
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_basic.sh31
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_invalid_options.sh14
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_line_semantics.sh7
-rwxr-xr-xtools/perf/tests/shell/base_report/setup.sh10
-rwxr-xr-xtools/perf/tests/shell/base_report/test_basic.sh103
-rwxr-xr-xtools/perf/tests/shell/buildid.sh203
-rwxr-xr-xtools/perf/tests/shell/c2c.sh62
-rw-r--r--tools/perf/tests/shell/common/init.sh4
-rw-r--r--tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c2
-rw-r--r--tools/perf/tests/shell/coresight/thread_loop/thread_loop.c4
-rw-r--r--tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c4
-rwxr-xr-xtools/perf/tests/shell/evlist.sh79
-rwxr-xr-xtools/perf/tests/shell/jitdump-python.sh81
-rwxr-xr-xtools/perf/tests/shell/kallsyms.sh56
-rwxr-xr-xtools/perf/tests/shell/kvm.sh154
-rw-r--r--tools/perf/tests/shell/lib/perf_json_output_lint.py9
-rw-r--r--tools/perf/tests/shell/lib/stat_output.sh2
-rwxr-xr-xtools/perf/tests/shell/lock_contention.sh21
-rwxr-xr-xtools/perf/tests/shell/python-use.sh36
-rwxr-xr-xtools/perf/tests/shell/record.sh40
-rwxr-xr-xtools/perf/tests/shell/record_lbr.sh26
-rwxr-xr-xtools/perf/tests/shell/record_weak_term.sh37
-rwxr-xr-xtools/perf/tests/shell/script_dlfilter.sh107
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+event_uniquifying.sh109
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+shadow_stat.sh4
-rwxr-xr-xtools/perf/tests/shell/stat+std_output.sh10
-rwxr-xr-xtools/perf/tests/shell/stat.sh45
-rwxr-xr-xtools/perf/tests/shell/stat_all_metricgroups.sh3
-rwxr-xr-xtools/perf/tests/shell/stat_all_metrics.sh30
-rwxr-xr-xtools/perf/tests/shell/test_bpf_metadata.sh2
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh106
-rwxr-xr-xtools/perf/tests/shell/test_event_open_fallback.sh71
-rwxr-xr-xtools/perf/tests/shell/timechart.sh67
-rwxr-xr-xtools/perf/tests/shell/top.sh74
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh11
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/tests.h5
-rw-r--r--tools/perf/tests/workloads/Build2
-rw-r--r--tools/perf/tests/workloads/thloop.c45
-rw-r--r--tools/perf/tests/workloads/traploop.c31
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h5
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h19
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h93
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h19
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/vhost.h35
-rw-r--r--tools/perf/ui/browsers/annotate.c237
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/ui/hist.c1
-rw-r--r--tools/perf/ui/libslang.h4
-rw-r--r--tools/perf/util/Build29
-rw-r--r--tools/perf/util/addr2line.c439
-rw-r--r--tools/perf/util/addr2line.h20
-rw-r--r--tools/perf/util/annotate-data.c97
-rw-r--r--tools/perf/util/annotate-data.h27
-rw-r--r--tools/perf/util/annotate.c205
-rw-r--r--tools/perf/util/annotate.h31
-rw-r--r--tools/perf/util/arm-spe-decoder/Build2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c93
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h94
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c67
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h47
-rw-r--r--tools/perf/util/arm-spe.c293
-rw-r--r--tools/perf/util/arm-spe.h2
-rw-r--r--tools/perf/util/auxtrace.c34
-rw-r--r--tools/perf/util/auxtrace.h228
-rw-r--r--tools/perf/util/bpf-event.c39
-rw-r--r--tools/perf/util/bpf-filter.c5
-rw-r--r--tools/perf/util/bpf-filter.h2
-rw-r--r--tools/perf/util/bpf-trace-summary.c41
-rw-r--r--tools/perf/util/bpf-utils.c61
-rw-r--r--tools/perf/util/bpf-utils.h10
-rw-r--r--tools/perf/util/bpf_counter.c93
-rw-r--r--tools/perf/util/bpf_counter.h74
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c84
-rw-r--r--tools/perf/util/bpf_ftrace.c4
-rw-r--r--tools/perf/util/bpf_lock_contention.c6
-rw-r--r--tools/perf/util/bpf_map.c1
-rw-r--r--tools/perf/util/bpf_off_cpu.c1
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c4
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c18
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.h15
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c2
-rw-r--r--tools/perf/util/build-id.c7
-rw-r--r--tools/perf/util/callchain.c51
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/capstone.c471
-rw-r--r--tools/perf/util/capstone.h24
-rw-r--r--tools/perf/util/cgroup.c1
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/cpumap.c9
-rw-r--r--tools/perf/util/cs-etm-decoder/Build2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c44
-rw-r--r--tools/perf/util/cs-etm.c7
-rw-r--r--tools/perf/util/debuginfo.c8
-rw-r--r--tools/perf/util/disasm.c652
-rw-r--r--tools/perf/util/disasm.h6
-rw-r--r--tools/perf/util/disasm_bpf.c195
-rw-r--r--tools/perf/util/disasm_bpf.h12
-rw-r--r--tools/perf/util/drm_pmu.c7
-rw-r--r--tools/perf/util/dso.c112
-rw-r--r--tools/perf/util/dso.h25
-rw-r--r--tools/perf/util/dwarf-aux.c69
-rw-r--r--tools/perf/util/dwarf-aux.h2
-rw-r--r--tools/perf/util/env.c22
-rw-r--r--tools/perf/util/env.h2
-rw-r--r--tools/perf/util/event.c1
-rw-r--r--tools/perf/util/event.h20
-rw-r--r--tools/perf/util/evlist.c19
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c244
-rw-r--r--tools/perf/util/evsel.h8
-rw-r--r--tools/perf/util/evsel_config.h1
-rw-r--r--tools/perf/util/evsel_fprintf.c5
-rw-r--r--tools/perf/util/evswitch.c1
-rw-r--r--tools/perf/util/expr.c8
-rw-r--r--tools/perf/util/genelf.c32
-rw-r--r--tools/perf/util/get_current_dir_name.c18
-rw-r--r--tools/perf/util/get_current_dir_name.h8
-rw-r--r--tools/perf/util/header.c19
-rw-r--r--tools/perf/util/header.h6
-rw-r--r--tools/perf/util/hisi-ptt-decoder/Build2
-rw-r--r--tools/perf/util/hist.c6
-rw-r--r--tools/perf/util/hist.h20
-rw-r--r--tools/perf/util/hwmon_pmu.c5
-rw-r--r--tools/perf/util/hwmon_pmu.h2
-rw-r--r--tools/perf/util/include/linux/linkage.h2
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/Build8
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c4
-rw-r--r--tools/perf/util/intel-tpebs.c4
-rw-r--r--tools/perf/util/jitdump.c5
-rw-r--r--tools/perf/util/kvm-stat.h11
-rw-r--r--tools/perf/util/libbfd.c643
-rw-r--r--tools/perf/util/libbfd.h82
-rw-r--r--tools/perf/util/llvm.c273
-rw-r--r--tools/perf/util/llvm.h21
-rw-r--r--tools/perf/util/lzma.c2
-rw-r--r--tools/perf/util/machine.c1
-rw-r--r--tools/perf/util/map.c19
-rw-r--r--tools/perf/util/map.h6
-rw-r--r--tools/perf/util/maps.c31
-rw-r--r--tools/perf/util/mem-events.c5
-rw-r--r--tools/perf/util/metricgroup.c95
-rw-r--r--tools/perf/util/metricgroup.h2
-rw-r--r--tools/perf/util/mmap.c1
-rw-r--r--tools/perf/util/mutex.c14
-rw-r--r--tools/perf/util/mutex.h2
-rw-r--r--tools/perf/util/namespaces.c7
-rw-r--r--tools/perf/util/parse-events.c455
-rw-r--r--tools/perf/util/parse-events.h25
-rw-r--r--tools/perf/util/parse-events.l78
-rw-r--r--tools/perf/util/parse-events.y114
-rw-r--r--tools/perf/util/perf_api_probe.c27
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c2
-rw-r--r--tools/perf/util/pfm.c1
-rw-r--r--tools/perf/util/pmu.c322
-rw-r--r--tools/perf/util/pmu.h33
-rw-r--r--tools/perf/util/powerpc-vpadtl.c733
-rw-r--r--tools/perf/util/powerpc-vpadtl.h23
-rw-r--r--tools/perf/util/print-events.c112
-rw-r--r--tools/perf/util/print-events.h4
-rw-r--r--tools/perf/util/print_insn.c117
-rw-r--r--tools/perf/util/probe-event.c12
-rw-r--r--tools/perf/util/python.c560
-rw-r--r--tools/perf/util/s390-sample-raw.c55
-rw-r--r--tools/perf/util/sample.h2
-rw-r--r--tools/perf/util/scripting-engines/Build2
-rw-r--r--tools/perf/util/session.c184
-rw-r--r--tools/perf/util/session.h3
-rw-r--r--tools/perf/util/setup.py14
-rw-r--r--tools/perf/util/srcline.c772
-rw-r--r--tools/perf/util/srcline.h9
-rw-r--r--tools/perf/util/stat-display.c68
-rw-r--r--tools/perf/util/stat-shadow.c547
-rw-r--r--tools/perf/util/stat.c59
-rw-r--r--tools/perf/util/stat.h32
-rw-r--r--tools/perf/util/symbol-elf.c103
-rw-r--r--tools/perf/util/symbol-minimal.c62
-rw-r--r--tools/perf/util/symbol.c164
-rw-r--r--tools/perf/util/synthetic-events.c2
-rw-r--r--tools/perf/util/synthetic-events.h15
-rw-r--r--tools/perf/util/tool.c222
-rw-r--r--tools/perf/util/tool.h23
-rw-r--r--tools/perf/util/tool_pmu.c105
-rw-r--r--tools/perf/util/tool_pmu.h10
-rw-r--r--tools/perf/util/tp_pmu.c2
-rw-r--r--tools/perf/util/trace.h4
-rw-r--r--tools/perf/util/zlib.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c4
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c3
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c2
-rw-r--r--tools/power/acpi/tools/pfrut/pfrut.c7
-rw-r--r--tools/power/cpupower/.gitignore3
-rw-r--r--tools/power/cpupower/Makefile32
-rw-r--r--tools/power/cpupower/lib/cpuidle.c5
-rw-r--r--tools/power/cpupower/lib/cpupower.c2
-rw-r--r--tools/power/cpupower/man/cpupower-set.17
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c16
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c5
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h14
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c76
-rwxr-xr-xtools/power/x86/amd_pstate_tracer/amd_pstate_trace.py2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c46
-rw-r--r--tools/power/x86/turbostat/turbostat.827
-rw-r--r--tools/power/x86/turbostat/turbostat.c1209
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile29
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.815
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c178
-rw-r--r--tools/sched_ext/Makefile4
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.bpf.h175
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.h33
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h119
-rw-r--r--tools/sched_ext/include/scx/common.h5
-rw-r--r--tools/sched_ext/include/scx/compat.bpf.h330
-rw-r--r--tools/sched_ext/include/scx/compat.h14
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.bpf.h40
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.h49
-rw-r--r--tools/sched_ext/include/scx/user_exit_info_common.h30
-rw-r--r--tools/sched_ext/scx_central.bpf.c2
-rw-r--r--tools/sched_ext/scx_central.c1
-rw-r--r--tools/sched_ext/scx_cpu0.bpf.c88
-rw-r--r--tools/sched_ext/scx_cpu0.c106
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c12
-rw-r--r--tools/sched_ext/scx_flatcg.c2
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c150
-rw-r--r--tools/sched_ext/scx_qmap.c12
-rw-r--r--tools/sched_ext/scx_simple.c2
-rw-r--r--tools/scripts/syscall.tbl2
-rw-r--r--tools/testing/cxl/Kbuild10
-rw-r--r--tools/testing/cxl/cxl_core_exports.c22
-rw-r--r--tools/testing/cxl/exports.h13
-rw-r--r--tools/testing/cxl/test/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c169
-rw-r--r--tools/testing/cxl/test/cxl_translate.c445
-rw-r--r--tools/testing/cxl/test/mem.c11
-rw-r--r--tools/testing/cxl/test/mock.c148
-rw-r--r--tools/testing/cxl/test/mock.h13
-rwxr-xr-xtools/testing/ktest/config-bisect.pl4
-rw-r--r--tools/testing/kunit/configs/arch_uml.config5
-rwxr-xr-xtools/testing/kunit/kunit.py4
-rw-r--r--tools/testing/kunit/kunit_parser.py8
-rw-r--r--tools/testing/kunit/qemu_configs/mips.py18
-rw-r--r--tools/testing/kunit/qemu_configs/mips64.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mips64el.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mipsel.py18
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-kselftest.log3
-rw-r--r--tools/testing/nvdimm/test/ndtest.c13
-rw-r--r--tools/testing/nvdimm/test/nfit.c7
-rw-r--r--tools/testing/radix-tree/idr-test.c16
-rw-r--r--tools/testing/radix-tree/maple.c524
-rw-r--r--tools/testing/scatterlist/linux/mm.h1
-rw-r--r--tools/testing/selftests/Makefile5
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c2
-rw-r--r--tools/testing/selftests/alsa/conf.c4
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c10
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c10
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c2
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c24
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c2
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c14
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c13
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c8
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c4
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c167
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c3
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c3
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S2
-rw-r--r--tools/testing/selftests/arm64/gcs/Makefile6
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c12
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-locking.c1
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c4
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c7
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c2
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile85
-rw-r--r--tools/testing/selftests/bpf/bench.c22
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c65
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c5
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c65
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh4
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh4
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_strsearch.h128
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h56
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h15
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h3
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c20
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config9
-rw-r--r--tools/testing/selftests/bpf/config.aarch6412
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el1
-rw-r--r--tools/testing/selftests/bpf/config.riscv641
-rw-r--r--tools/testing/selftests/bpf/config.s390x11
-rw-r--r--tools/testing/selftests/bpf/config.x86_645
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c54
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_strsearch.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c504
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c87
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c122
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/file_reader.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c247
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_htab.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sha256.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_edt.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c714
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c2596
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.h298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c484
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c265
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xsk.c151
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c9
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_strsearch.c146
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_gotox.c448
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h28
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c117
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h14
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c12
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c21
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c46
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c67
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c34
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c145
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader_fail.c52
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c19
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c5
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c6
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/livepatch_trampoline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c8
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_tailcall.c8
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c4
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_subflow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c40
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c11
-rw-r--r--tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c104
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c49
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)2
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c158
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c18
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c23
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h6
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_htab.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c95
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c637
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c33
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c18
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c4
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c60
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c181
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c233
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c27
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c32
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotox.c389
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c178
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c344
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c21
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c5
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c40
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c47
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c2
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c17
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c23
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh4
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c393
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c196
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h4
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c329
-rw-r--r--tools/testing/selftests/bpf/test_maps.c3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h17
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/test_tag.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh100
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh320
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c20
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c234
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/usdt.h545
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c8
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh11
-rw-r--r--tools/testing/selftests/bpf/veristat.c56
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.h4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2526
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h156
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c2
-rw-r--r--tools/testing/selftests/cachestat/.gitignore1
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c6
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c2
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c2
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c12
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h21
-rw-r--r--tools/testing/selftests/cgroup/test_core.c9
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c27
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c9
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c672
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c9
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c9
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c9
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c5
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c9
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h2
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/connector/proc_filter.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c2
-rw-r--r--tools/testing/selftests/core/unshare_test.c2
-rw-r--r--tools/testing/selftests/coredump/.gitignore4
-rw-r--r--tools/testing/selftests/coredump/Makefile8
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_protocol_test.c1568
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_test.c742
-rw-r--r--tools/testing/selftests/coredump/coredump_test.h59
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c383
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1667
-rw-r--r--tools/testing/selftests/damon/Makefile3
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py11
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c1
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py9
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py71
-rwxr-xr-xtools/testing/selftests/damon/sysfs_no_op_commit_break.py72
-rw-r--r--tools/testing/selftests/dma/Makefile7
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c2
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile18
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile20
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh156
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh108
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh197
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh105
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config12
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh361
-rw-r--r--tools/testing/selftests/drivers/net/config7
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile12
-rw-r--r--tools/testing/selftests/drivers/net/gro.c (renamed from tools/testing/selftests/net/gro.c)75
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py164
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py42
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile36
-rw-r--r--tools/testing/selftests/drivers/net/hw/config6
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py174
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py14
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py47
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c856
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py113
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py36
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py18
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c (renamed from tools/testing/selftests/net/toeplitz.c)72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py11
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py50
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py47
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py84
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh90
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh12
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py34
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_torture.sh130
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile9
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh116
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py640
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c483
-rwxr-xr-xtools/testing/selftests/drivers/net/ring_reconfig.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py40
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/team/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/team/options.sh188
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile13
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py163
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c2
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c2
-rw-r--r--tools/testing/selftests/exec/check-exec.c2
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/exec/load_address.c2
-rw-r--r--tools/testing/selftests/exec/non-regular.c2
-rw-r--r--tools/testing/selftests/exec/null-argv.c2
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c2
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c2
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c2
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c3
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c2
-rw-r--r--tools/testing/selftests/filesystems/fclog.c130
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c2
-rw-r--r--tools/testing/selftests/filesystems/fuse/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/fuse/Makefile21
-rw-r--r--tools/testing/selftests/filesystems/fuse/fuse_mnt.c146
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c140
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c19
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c20
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c2
-rw-r--r--tools/testing/selftests/filesystems/utils.c4
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest34
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions6
-rw-r--r--tools/testing/selftests/futex/functional/Makefile8
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c3
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c100
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c67
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c266
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c129
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c103
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c139
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c99
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh62
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/hid/hid_common.h8
-rw-r--r--tools/testing/selftests/hid/hidraw.c473
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py55
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py71
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh668
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c105
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h66
-rw-r--r--tools/testing/selftests/ipc/msgque.c2
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c2
-rw-r--r--tools/testing/selftests/kexec/.gitignore2
-rw-r--r--tools/testing/selftests/kho/init.c13
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh29
-rw-r--r--tools/testing/selftests/kselftest.h22
-rw-r--r--tools/testing/selftests/kselftest/runner.sh14
-rw-r--r--tools/testing/selftests/kselftest_harness.h19
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile1
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c2
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm22
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c12
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c85
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c102
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c64
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c17
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c291
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c77
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c35
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c1
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c9
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c367
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h24
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h5
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h84
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h3
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h46
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h81
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h1
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h26
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h42
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h3
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c14
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c19
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c117
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c64
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c251
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S6
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c47
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c49
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c128
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c1
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c15
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c157
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c64
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c17
-rw-r--r--tools/testing/selftests/kvm/steal_time.c2
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c82
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c2
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c75
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c12
-rw-r--r--tools/testing/selftests/landlock/Makefile2
-rw-r--r--tools/testing/selftests/landlock/audit.h6
-rw-r--r--tools/testing/selftests/landlock/common.h6
-rw-r--r--tools/testing/selftests/landlock/fs_test.c1474
-rw-r--r--tools/testing/selftests/lib.mk8
-rw-r--r--tools/testing/selftests/livepatch/functions.sh6
-rw-r--r--tools/testing/selftests/liveupdate/.gitignore9
-rw-r--r--tools/testing/selftests/liveupdate/Makefile34
-rw-r--r--tools/testing/selftests/liveupdate/config11
-rwxr-xr-xtools/testing/selftests/liveupdate/do_kexec.sh16
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c348
-rw-r--r--tools/testing/selftests/liveupdate/luo_kexec_simple.c89
-rw-r--r--tools/testing/selftests/liveupdate/luo_multi_session.c162
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c266
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h44
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h2
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c4
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile5
-rw-r--r--tools/testing/selftests/mm/compaction_test.c2
-rw-r--r--tools/testing/selftests/mm/cow.c19
-rw-r--r--tools/testing/selftests/mm/droppable.c2
-rw-r--r--tools/testing/selftests/mm/guard-regions.c189
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c2
-rw-r--r--tools/testing/selftests/mm/gup_test.c28
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c926
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c2
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c18
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c6
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c2
-rw-r--r--tools/testing/selftests/mm/khugepaged.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c258
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/mm/madv_populate.c23
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c2
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c2
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c2
-rw-r--r--tools/testing/selftests/mm/merge.c2
-rw-r--r--tools/testing/selftests/mm/migration.c4
-rw-r--r--tools/testing/selftests/mm/mkdirty.c2
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c2
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c2
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c2
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c2
-rw-r--r--tools/testing/selftests/mm/mremap_test.c271
-rw-r--r--tools/testing/selftests/mm/mseal_test.c2
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c28
-rw-r--r--tools/testing/selftests/mm/pfnmap.c50
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h5
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c2
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c291
-rw-r--r--tools/testing/selftests/mm/process_madv.c2
-rw-r--r--tools/testing/selftests/mm/protection_keys.c6
-rw-r--r--tools/testing/selftests/mm/rmap.c433
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh31
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c132
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c478
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh6
-rw-r--r--tools/testing/selftests/mm/thp_settings.c9
-rw-r--r--tools/testing/selftests/mm/thp_settings.h1
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c13
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.c293
-rw-r--r--tools/testing/selftests/mm/uffd-common.h80
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c245
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c578
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c31
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c6
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh37
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c15
-rw-r--r--tools/testing/selftests/mm/vm_util.c174
-rw-r--r--tools/testing/selftests/mm/vm_util.h23
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c79
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c4
-rw-r--r--tools/testing/selftests/namespaces/.gitignore12
-rw-r--r--tools/testing/selftests/namespaces/Makefile29
-rw-r--r--tools/testing/selftests/namespaces/config7
-rw-r--r--tools/testing/selftests/namespaces/cred_change_test.c814
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c1429
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c61
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c530
-rw-r--r--tools/testing/selftests/namespaces/listns_pagination_bug.c138
-rw-r--r--tools/testing/selftests/namespaces/listns_permissions_test.c759
-rw-r--r--tools/testing/selftests/namespaces/listns_test.c679
-rw-r--r--tools/testing/selftests/namespaces/ns_active_ref_test.c2672
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c981
-rw-r--r--tools/testing/selftests/namespaces/regression_pidfd_setns_test.c113
-rw-r--r--tools/testing/selftests/namespaces/siocgskns_test.c1824
-rw-r--r--tools/testing/selftests/namespaces/stress_test.c626
-rw-r--r--tools/testing/selftests/namespaces/wrappers.h35
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/.gitignore10
-rw-r--r--tools/testing/selftests/net/Makefile303
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c28
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c4
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c30
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh2
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py4
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh24
-rw-r--r--tools/testing/selftests/net/busy_poller.c16
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c2
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c12
-rw-r--r--tools/testing/selftests/net/config139
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh435
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh26
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh52
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh66
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile57
-rw-r--r--tools/testing/selftests/net/forwarding/README15
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh100
-rw-r--r--tools/testing/selftests/net/forwarding/config30
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh50
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh9
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh141
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh33
-rwxr-xr-xtools/testing/selftests/net/gro.sh105
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rw-r--r--tools/testing/selftests/net/ipsec.c2
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rw-r--r--tools/testing/selftests/net/lib.sh74
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py32
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py115
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py2
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py65
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh20
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c103
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile32
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh154
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c14
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh582
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh81
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c30
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh45
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c25
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh46
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh19
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile89
-rw-r--r--tools/testing/selftests/net/netfilter/config51
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh5
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh56
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh13
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh213
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh4
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c2
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c46
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh88
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile12
-rw-r--r--tools/testing/selftests/net/ovpn/config12
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c5
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config4
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh3
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt6
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt2
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh9
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c2
-rw-r--r--tools/testing/selftests/net/psock_fanout.c2
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c6
-rw-r--r--tools/testing/selftests/net/rds/Makefile10
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh37
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh45
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c2
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/socket.c13
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c2
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh8
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh2
-rw-r--r--tools/testing/selftests/net/tls.c541
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh561
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c2
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh46
-rw-r--r--tools/testing/selftests/net/ynl.mk5
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc23
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c19
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh8
-rw-r--r--tools/testing/selftests/openat2/helpers.h2
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c2
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c2
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c2
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c6
-rw-r--r--tools/testing/selftests/perf_events/mmap.c2
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c2
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c4
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c2
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c2
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h17
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c75
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c2
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h2
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c2
-rw-r--r--tools/testing/selftests/proc/.gitignore2
-rw-r--r--tools/testing/selftests/proc/Makefile2
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c67
-rw-r--r--tools/testing/selftests/proc/proc-net-dev-lseek.c68
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c14
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c211
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c2
-rw-r--r--tools/testing/selftests/ptrace/get_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh116
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c2
-rw-r--r--tools/testing/selftests/riscv/README24
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c167
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile5
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c2
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h3
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h39
-rw-r--r--tools/testing/selftests/rseq/rseq.c10
-rw-r--r--tools/testing/selftests/rtc/rtctest.c2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh14
-rw-r--r--tools/testing/selftests/sched_ext/Makefile1
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c1
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c251
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.c224
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c240
-rw-r--r--tools/testing/selftests/sgx/main.c2
-rw-r--r--tools/testing/selftests/signal/mangle_uc_sigmask.c2
-rw-r--r--tools/testing/selftests/signal/sas.c2
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c2
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json270
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c2
-rw-r--r--tools/testing/selftests/timens/timens.h2
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
-rw-r--r--tools/testing/selftests/timers/freq-step.c2
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c2
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c2
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c2
-rw-r--r--tools/testing/selftests/timers/nanosleep.c57
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c2
-rw-r--r--tools/testing/selftests/timers/posix_timers.c34
-rw-r--r--tools/testing/selftests/timers/raw_skew.c2
-rw-r--r--tools/testing/selftests/timers/rtcpie.c2
-rw-r--r--tools/testing/selftests/timers/set-2038.c2
-rw-r--r--tools/testing/selftests/timers/set-tai.c2
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c2
-rw-r--r--tools/testing/selftests/timers/set-tz.c2
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c2
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py4
-rw-r--r--tools/testing/selftests/tty/.gitignore1
-rw-r--r--tools/testing/selftests/tty/Makefile6
-rw-r--r--tools/testing/selftests/tty/config1
-rw-r--r--tools/testing/selftests/tty/tty_tiocsti_test.c650
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c2
-rw-r--r--tools/testing/selftests/ublk/Makefile1
-rw-r--r--tools/testing/selftests/ublk/file_backed.c10
-rw-r--r--tools/testing/selftests/ublk/kublk.c142
-rw-r--r--tools/testing/selftests/ublk/kublk.h54
-rw-r--r--tools/testing/selftests/ublk/null.c4
-rw-r--r--tools/testing/selftests/ublk/stripe.c4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_13.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh6
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh4
-rw-r--r--tools/testing/selftests/ublk/utils.h2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c2
-rw-r--r--tools/testing/selftests/user_events/abi_test.c2
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c2
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c2
-rw-r--r--tools/testing/selftests/user_events/perf_test.c4
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h2
-rw-r--r--tools/testing/selftests/vDSO/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h7
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c103
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c123
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c2
-rw-r--r--tools/testing/selftests/verification/.gitignore (renamed from tools/bpf/runqslower/.gitignore)2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile29
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c378
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c312
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c182
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c263
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh354
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c6
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config10
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c2
-rw-r--r--tools/testing/selftests/x86/helpers.h2
-rw-r--r--tools/testing/selftests/x86/lam.c2
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c2
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c2
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c23
-rw-r--r--tools/testing/selftests/x86/xstate.h2
-rw-r--r--tools/testing/selftests/zram/README1
-rw-r--r--tools/testing/shared/linux.c120
-rw-r--r--tools/testing/shared/linux/idr.h4
-rw-r--r--tools/testing/shared/linux/maple_tree.h6
-rw-r--r--tools/testing/shared/maple-shared.h11
-rw-r--r--tools/testing/shared/maple-shim.c7
-rw-r--r--tools/testing/shared/shared.mk6
-rw-r--r--tools/testing/vma/linux/atomic.h17
-rw-r--r--tools/testing/vma/vma.c112
-rw-r--r--tools/testing/vma/vma_internal.h933
-rw-r--r--tools/testing/vsock/util.c1
-rw-r--r--tools/testing/vsock/vsock_test.c7
-rw-r--r--tools/thermal/thermal-engine/thermal-engine.c2
-rw-r--r--tools/tracing/latency/Makefile.config8
-rw-r--r--tools/tracing/latency/latency-collector.c2
-rw-r--r--tools/tracing/rtla/Makefile.config8
-rw-r--r--tools/tracing/rtla/Makefile.rtla2
-rw-r--r--tools/tracing/rtla/src/Build1
-rw-r--r--tools/tracing/rtla/src/actions.c12
-rw-r--r--tools/tracing/rtla/src/actions.h2
-rw-r--r--tools/tracing/rtla/src/common.c350
-rw-r--r--tools/tracing/rtla/src/common.h158
-rw-r--r--tools/tracing/rtla/src/osnoise.c101
-rw-r--r--tools/tracing/rtla/src/osnoise.h114
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c463
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c379
-rw-r--r--tools/tracing/rtla/src/timerlat.bpf.c3
-rw-r--r--tools/tracing/rtla/src/timerlat.c206
-rw-r--r--tools/tracing/rtla/src/timerlat.h55
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.c22
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c768
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c678
-rw-r--r--tools/tracing/rtla/src/timerlat_u.c12
-rw-r--r--tools/tracing/rtla/src/trace.h3
-rw-r--r--tools/tracing/rtla/src/utils.c41
-rw-r--r--tools/tracing/rtla/src/utils.h2
-rw-r--r--tools/tracing/rtla/tests/engine.sh26
-rw-r--r--tools/tracing/rtla/tests/osnoise.t27
-rw-r--r--tools/tracing/rtla/tests/timerlat.t10
-rw-r--r--tools/usb/usbip/src/usbipd.c4
-rw-r--r--tools/virtio/linux/compiler.h2
-rw-r--r--tools/virtio/linux/kmsan.h2
1880 files changed, 121127 insertions, 29420 deletions
diff --git a/tools/Makefile b/tools/Makefile
index c31cbbd12c45..cb40961a740f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -14,6 +14,7 @@ help:
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
+ @echo ' dma - tools for DMA mapping'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@echo ' firmware - Firmware tools'
@echo ' freefall - laptop accelerometer program for disk protection'
@@ -69,7 +70,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
+counter dma firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -122,7 +123,7 @@ kvm_stat: FORCE
ynl: FORCE
$(call descend,net/ynl)
-all: acpi counter cpupower gpio hv firewire \
+all: acpi counter cpupower dma gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
@@ -134,7 +135,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
+counter_install dma_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -164,7 +165,7 @@ kvm_stat_install:
ynl_install:
$(call descend,net/$(@:_install=),install)
-install: acpi_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install dma_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
@@ -178,7 +179,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
+counter_clean dma_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -224,7 +225,7 @@ build_clean:
ynl_clean:
$(call descend,net/$(@:_clean=),clean)
-clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean dma_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 9afb1ffc00ba..72cc500b44b1 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -42,14 +42,13 @@
#include <linux/genetlink.h>
#include <linux/taskstats.h>
#include <linux/cgroupstats.h>
+#include <stddef.h>
-#define PSI_CPU_SOME "/proc/pressure/cpu"
-#define PSI_CPU_FULL "/proc/pressure/cpu"
-#define PSI_MEMORY_SOME "/proc/pressure/memory"
-#define PSI_MEMORY_FULL "/proc/pressure/memory"
-#define PSI_IO_SOME "/proc/pressure/io"
-#define PSI_IO_FULL "/proc/pressure/io"
-#define PSI_IRQ_FULL "/proc/pressure/irq"
+#define PSI_PATH "/proc/pressure"
+#define PSI_CPU_PATH "/proc/pressure/cpu"
+#define PSI_MEMORY_PATH "/proc/pressure/memory"
+#define PSI_IO_PATH "/proc/pressure/io"
+#define PSI_IRQ_PATH "/proc/pressure/irq"
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
@@ -61,24 +60,28 @@
#define TASK_COMM_LEN 16
#define MAX_MSG_SIZE 1024
#define MAX_TASKS 1000
+#define MAX_BUF_LEN 256
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
#define BOOL_FPRINT(stream, fmt, ...) \
({ \
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
ret >= 0; \
})
+#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
-
-/* Program settings structure */
-struct config {
- int delay; /* Update interval in seconds */
- int iterations; /* Number of iterations, 0 == infinite */
- int max_processes; /* Maximum number of processes to show */
- char sort_field; /* Field to sort by */
- int output_one_time; /* Output once and exit */
- int monitor_pid; /* Monitor specific PID */
- char *container_path; /* Path to container cgroup */
-};
+#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
+#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
+#define SORT_FIELD(name, cmd, modes) \
+ {#name, #cmd, \
+ offsetof(struct task_info, name##_delay_total), \
+ offsetof(struct task_info, name##_count), \
+ modes}
+#define END_FIELD {NULL, 0, 0}
+
+/* Display mode types */
+#define MODE_TYPE_ALL (0xFFFFFFFF)
+#define MODE_DEFAULT (1 << 0)
+#define MODE_MEMVERBOSE (1 << 1)
/* PSI statistics structure */
struct psi_stats {
@@ -119,6 +122,8 @@ struct task_info {
unsigned long long wpcopy_delay_total;
unsigned long long irq_count;
unsigned long long irq_delay_total;
+ unsigned long long mem_count;
+ unsigned long long mem_delay_total;
};
/* Container statistics structure */
@@ -130,6 +135,27 @@ struct container_stats {
int nr_io_wait; /* Number of processes in IO wait */
};
+/* Delay field structure */
+struct field_desc {
+ const char *name; /* Field name for cmdline argument */
+ const char *cmd_char; /* Interactive command */
+ unsigned long total_offset; /* Offset of total delay in task_info */
+ unsigned long count_offset; /* Offset of count in task_info */
+ size_t supported_modes; /* Supported display modes */
+};
+
+/* Program settings structure */
+struct config {
+ int delay; /* Update interval in seconds */
+ int iterations; /* Number of iterations, 0 == infinite */
+ int max_processes; /* Maximum number of processes to show */
+ int output_one_time; /* Output once and exit */
+ int monitor_pid; /* Monitor specific PID */
+ char *container_path; /* Path to container cgroup */
+ const struct field_desc *sort_field; /* Current sort field */
+ size_t display_mode; /* Current display mode */
+};
+
/* Global variables */
static struct config cfg;
static struct psi_stats psi;
@@ -137,6 +163,19 @@ static struct task_info tasks[MAX_TASKS];
static int task_count;
static int running = 1;
static struct container_stats container_stats;
+static const struct field_desc sort_fields[] = {
+ SORT_FIELD(cpu, c, MODE_DEFAULT),
+ SORT_FIELD(blkio, i, MODE_DEFAULT),
+ SORT_FIELD(irq, q, MODE_DEFAULT),
+ SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE),
+ SORT_FIELD(swapin, s, MODE_MEMVERBOSE),
+ SORT_FIELD(freepages, r, MODE_MEMVERBOSE),
+ SORT_FIELD(thrashing, t, MODE_MEMVERBOSE),
+ SORT_FIELD(compact, p, MODE_MEMVERBOSE),
+ SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE),
+ END_FIELD
+};
+static int sort_selected;
/* Netlink socket variables */
static int nl_sd = -1;
@@ -158,18 +197,75 @@ static void disable_raw_mode(void)
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
}
+/* Find field descriptor by command line */
+static const struct field_desc *get_field_by_cmd_char(char ch)
+{
+ const struct field_desc *field;
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->cmd_char[0] == ch)
+ return field;
+ }
+
+ return NULL;
+}
+
+/* Find field descriptor by name with string comparison */
+static const struct field_desc *get_field_by_name(const char *name)
+{
+ const struct field_desc *field;
+ size_t field_len;
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ field_len = strlen(field->name);
+ if (field_len != strlen(name))
+ continue;
+ if (strncmp(field->name, name, field_len) == 0)
+ return field;
+ }
+
+ return NULL;
+}
+
+/* Find display name for a field descriptor */
+static const char *get_name_by_field(const struct field_desc *field)
+{
+ return field ? field->name : "UNKNOWN";
+}
+
+/* Generate string of available field names */
+static void display_available_fields(size_t mode)
+{
+ const struct field_desc *field;
+ char buf[MAX_BUF_LEN];
+
+ buf[0] = '\0';
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (!(field->supported_modes & mode))
+ continue;
+ strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
+ strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
+ buf[MAX_BUF_LEN - 1] = '\0';
+ }
+
+ fprintf(stderr, "Available fields: %s\n", buf);
+}
+
/* Display usage information and command line options */
static void usage(void)
{
printf("Usage: delaytop [Options]\n"
"Options:\n"
- " -h, --help Show this help message and exit\n"
- " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
- " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
- " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
- " -o, --once Display once and exit\n"
- " -p, --pid=PID Monitor only the specified PID\n"
- " -C, --container=PATH Monitor the container at specified cgroup path\n");
+ " -h, --help Show this help message and exit\n"
+ " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
+ " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
+ " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
+ " -o, --once Display once and exit\n"
+ " -p, --pid=PID Monitor only the specified PID\n"
+ " -C, --container=PATH Monitor the container at specified cgroup path\n"
+ " -s, --sort=FIELD Sort by delay field (default: cpu)\n"
+ " -M, --memverbose Display memory detailed information\n");
exit(0);
}
@@ -177,6 +273,7 @@ static void usage(void)
static void parse_args(int argc, char **argv)
{
int c;
+ const struct field_desc *field;
struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"delay", required_argument, 0, 'd'},
@@ -184,7 +281,9 @@ static void parse_args(int argc, char **argv)
{"pid", required_argument, 0, 'p'},
{"once", no_argument, 0, 'o'},
{"processes", required_argument, 0, 'P'},
+ {"sort", required_argument, 0, 's'},
{"container", required_argument, 0, 'C'},
+ {"memverbose", no_argument, 0, 'M'},
{0, 0, 0, 0}
};
@@ -192,15 +291,16 @@ static void parse_args(int argc, char **argv)
cfg.delay = 2;
cfg.iterations = 0;
cfg.max_processes = 20;
- cfg.sort_field = 'c'; /* Default sort by CPU delay */
+ cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */
cfg.output_one_time = 0;
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
cfg.container_path = NULL;
+ cfg.display_mode = MODE_DEFAULT;
while (1) {
int option_index = 0;
- c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
+ c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
if (c == -1)
break;
@@ -247,6 +347,26 @@ static void parse_args(int argc, char **argv)
case 'C':
cfg.container_path = strdup(optarg);
break;
+ case 's':
+ if (strlen(optarg) == 0) {
+ fprintf(stderr, "Error: empty sort field\n");
+ exit(1);
+ }
+
+ field = get_field_by_name(optarg);
+ /* Show available fields if invalid option provided */
+ if (!field) {
+ fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
+ display_available_fields(MODE_TYPE_ALL);
+ exit(1);
+ }
+
+ cfg.sort_field = field;
+ break;
+ case 'M':
+ cfg.display_mode = MODE_MEMVERBOSE;
+ cfg.sort_field = get_field_by_name("mem");
+ break;
default:
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
exit(1);
@@ -254,6 +374,25 @@ static void parse_args(int argc, char **argv)
}
}
+/* Calculate average delay in milliseconds for overall memory */
+static void set_mem_delay_total(struct task_info *t)
+{
+ t->mem_delay_total = t->swapin_delay_total +
+ t->freepages_delay_total +
+ t->thrashing_delay_total +
+ t->compact_delay_total +
+ t->wpcopy_delay_total;
+}
+
+static void set_mem_count(struct task_info *t)
+{
+ t->mem_count = t->swapin_count +
+ t->freepages_count +
+ t->thrashing_count +
+ t->compact_count +
+ t->wpcopy_count;
+}
+
/* Create a raw netlink socket and bind */
static int create_nl_socket(void)
{
@@ -358,87 +497,134 @@ static int get_family_id(int sd)
return id;
}
-static void read_psi_stats(void)
+static int read_psi_stats(void)
{
FILE *fp;
char line[256];
int ret = 0;
+ int error_count = 0;
+
+ /* Check if PSI path exists */
+ if (access(PSI_PATH, F_OK) != 0) {
+ fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
+ fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
+ return -1;
+ }
+
/* Zero all fields */
memset(&psi, 0, sizeof(psi));
+
/* CPU pressure */
- fp = fopen(PSI_CPU_SOME, "r");
+ fp = fopen(PSI_CPU_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
&psi.cpu_some_avg300, &psi.cpu_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse CPU some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
&psi.cpu_full_avg300, &psi.cpu_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse CPU full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
+ error_count++;
}
+
/* Memory pressure */
- fp = fopen(PSI_MEMORY_SOME, "r");
+ fp = fopen(PSI_MEMORY_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_some_avg10, &psi.memory_some_avg60,
&psi.memory_some_avg300, &psi.memory_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse Memory some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_full_avg10, &psi.memory_full_avg60,
&psi.memory_full_avg300, &psi.memory_full_total);
- }
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse Memory full PSI data\n");
+ error_count++;
+ }
+ }
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
+ error_count++;
}
+
/* IO pressure */
- fp = fopen(PSI_IO_SOME, "r");
+ fp = fopen(PSI_IO_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_some_avg10, &psi.io_some_avg60,
&psi.io_some_avg300, &psi.io_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IO some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_full_avg10, &psi.io_full_avg60,
&psi.io_full_avg300, &psi.io_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IO full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
+ error_count++;
}
+
/* IRQ pressure (only full) */
- fp = fopen(PSI_IRQ_FULL, "r");
+ fp = fopen(PSI_IRQ_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.irq_full_avg10, &psi.irq_full_avg60,
&psi.irq_full_avg300, &psi.irq_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
+ error_count++;
+ }
+
+ /* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
+ if (error_count > 0) {
+ fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
+ return error_count;
}
+
+ return 0;
}
static int read_comm(int pid, char *comm_buf, size_t buf_size)
@@ -527,6 +713,8 @@ static void fetch_and_fill_task_info(int pid, const char *comm)
SET_TASK_STAT(task_count, wpcopy_delay_total);
SET_TASK_STAT(task_count, irq_count);
SET_TASK_STAT(task_count, irq_delay_total);
+ set_mem_count(&tasks[task_count]);
+ set_mem_delay_total(&tasks[task_count]);
task_count++;
}
break;
@@ -587,19 +775,23 @@ static int compare_tasks(const void *a, const void *b)
{
const struct task_info *t1 = (const struct task_info *)a;
const struct task_info *t2 = (const struct task_info *)b;
+ unsigned long long total1;
+ unsigned long long total2;
+ unsigned long count1;
+ unsigned long count2;
double avg1, avg2;
- switch (cfg.sort_field) {
- case 'c': /* CPU */
- avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
- avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
- if (avg1 != avg2)
- return avg2 > avg1 ? 1 : -1;
- return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
+ total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
+ total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
+ count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
+ count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
- default:
- return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
- }
+ avg1 = average_ms(total1, count1);
+ avg2 = average_ms(total2, count2);
+ if (avg1 != avg2)
+ return avg2 > avg1 ? 1 : -1;
+
+ return 0;
}
/* Sort tasks by selected field */
@@ -673,7 +865,7 @@ static void get_container_stats(void)
}
/* Display results to stdout or log file */
-static void display_results(void)
+static void display_results(int psi_ret)
{
time_t now = time(NULL);
struct tm *tm_now = localtime(&now);
@@ -686,49 +878,53 @@ static void display_results(void)
suc &= BOOL_FPRINT(out, "\033[H\033[J");
/* PSI output (one-line, no cat style) */
- suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "CPU some:",
- psi.cpu_some_avg10,
- psi.cpu_some_avg60,
- psi.cpu_some_avg300,
- psi.cpu_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "CPU full:",
- psi.cpu_full_avg10,
- psi.cpu_full_avg60,
- psi.cpu_full_avg300,
- psi.cpu_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "Memory full:",
- psi.memory_full_avg10,
- psi.memory_full_avg60,
- psi.memory_full_avg300,
- psi.memory_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "Memory some:",
- psi.memory_some_avg10,
- psi.memory_some_avg60,
- psi.memory_some_avg300,
- psi.memory_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IO full:",
- psi.io_full_avg10,
- psi.io_full_avg60,
- psi.io_full_avg300,
- psi.io_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IO some:",
- psi.io_some_avg10,
- psi.io_some_avg60,
- psi.io_some_avg300,
- psi.io_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IRQ full:",
- psi.irq_full_avg10,
- psi.irq_full_avg60,
- psi.irq_full_avg300,
- psi.irq_full_total / 1000);
+ suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
+ if (psi_ret) {
+ suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n");
+ } else {
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "CPU some:",
+ psi.cpu_some_avg10,
+ psi.cpu_some_avg60,
+ psi.cpu_some_avg300,
+ psi.cpu_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "CPU full:",
+ psi.cpu_full_avg10,
+ psi.cpu_full_avg60,
+ psi.cpu_full_avg300,
+ psi.cpu_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "Memory full:",
+ psi.memory_full_avg10,
+ psi.memory_full_avg60,
+ psi.memory_full_avg300,
+ psi.memory_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "Memory some:",
+ psi.memory_some_avg10,
+ psi.memory_some_avg60,
+ psi.memory_some_avg300,
+ psi.memory_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IO full:",
+ psi.io_full_avg10,
+ psi.io_full_avg60,
+ psi.io_full_avg300,
+ psi.io_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IO some:",
+ psi.io_some_avg10,
+ psi.io_some_avg60,
+ psi.io_some_avg300,
+ psi.io_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IRQ full:",
+ psi.irq_full_avg10,
+ psi.irq_full_avg60,
+ psi.irq_full_avg300,
+ psi.irq_full_total / 1000);
+ }
if (cfg.container_path) {
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
@@ -738,29 +934,59 @@ static void display_results(void)
container_stats.nr_stopped, container_stats.nr_uninterruptible,
container_stats.nr_io_wait);
}
- suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
- cfg.max_processes);
- suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
- suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
- "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
- "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
- suc &= BOOL_FPRINT(out, "-----------------------------------------------");
- suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
+ /* Interacive command */
+ suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
+ if (sort_selected) {
+ if (cfg.display_mode == MODE_MEMVERBOSE)
+ suc &= BOOL_FPRINT(out,
+ "sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
+ else
+ suc &= BOOL_FPRINT(out,
+ "sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
+ }
+
+ /* Task delay output */
+ suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
+ cfg.max_processes, get_name_by_field(cfg.sort_field));
+
+ suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND");
+ if (cfg.display_mode == MODE_MEMVERBOSE) {
+ suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
+ "MEM(ms)", "SWAP(ms)", "RCL(ms)",
+ "THR(ms)", "CMP(ms)", "WP(ms)");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "---------------------\n");
+ } else {
+ suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
+ "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "--------------------------\n");
+ }
+
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
for (i = 0; i < count; i++) {
- suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
+ suc &= BOOL_FPRINT(out, "%8d %8d %-15s",
tasks[i].pid, tasks[i].tgid, tasks[i].command);
- suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
- average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
- average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
- average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
- average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
- average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
- average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
- average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
- average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
+ if (cfg.display_mode == MODE_MEMVERBOSE) {
+ suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
+ TASK_AVG(tasks[i], mem),
+ TASK_AVG(tasks[i], swapin),
+ TASK_AVG(tasks[i], freepages),
+ TASK_AVG(tasks[i], thrashing),
+ TASK_AVG(tasks[i], compact),
+ TASK_AVG(tasks[i], wpcopy));
+ } else {
+ suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
+ TASK_AVG(tasks[i], cpu),
+ TASK_AVG(tasks[i], blkio),
+ TASK_AVG(tasks[i], irq),
+ TASK_AVG(tasks[i], mem));
+ }
}
suc &= BOOL_FPRINT(out, "\n");
@@ -769,11 +995,79 @@ static void display_results(void)
perror("Error writing to output");
}
+/* Check for keyboard input with timeout based on cfg.delay */
+static char check_for_keypress(void)
+{
+ struct timeval tv = {cfg.delay, 0};
+ fd_set readfds;
+ char ch = 0;
+
+ FD_ZERO(&readfds);
+ FD_SET(STDIN_FILENO, &readfds);
+ int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
+
+ if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
+ read(STDIN_FILENO, &ch, 1);
+ return ch;
+ }
+
+ return 0;
+}
+
+#define MAX_MODE_SIZE 2
+static void toggle_display_mode(void)
+{
+ static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
+ static size_t cur_index;
+
+ cur_index = (cur_index + 1) % MAX_MODE_SIZE;
+ cfg.display_mode = modes[cur_index];
+}
+
+/* Handle keyboard input: sorting selection, mode toggle, or quit */
+static void handle_keypress(char ch, int *running)
+{
+ const struct field_desc *field;
+
+ /* Change sort field */
+ if (sort_selected) {
+ field = get_field_by_cmd_char(ch);
+ if (field && (field->supported_modes & cfg.display_mode))
+ cfg.sort_field = field;
+
+ sort_selected = 0;
+ /* Handle mode changes or quit */
+ } else {
+ switch (ch) {
+ case 'o':
+ sort_selected = 1;
+ break;
+ case 'M':
+ toggle_display_mode();
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->supported_modes & cfg.display_mode) {
+ cfg.sort_field = field;
+ break;
+ }
+ }
+ break;
+ case 'q':
+ case 'Q':
+ *running = 0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
/* Main function */
int main(int argc, char **argv)
{
+ const struct field_desc *field;
int iterations = 0;
- int use_q_quit = 0;
+ int psi_ret = 0;
+ char keypress;
/* Parse command line arguments */
parse_args(argc, argv);
@@ -793,17 +1087,24 @@ int main(int argc, char **argv)
exit(1);
}
- if (!cfg.output_one_time) {
- use_q_quit = 1;
- enable_raw_mode();
- printf("Press 'q' to quit.\n");
- fflush(stdout);
- }
+ /* Set terminal to non-canonical mode for interaction */
+ enable_raw_mode();
/* Main loop */
while (running) {
+ /* Auto-switch sort field when not matching display mode */
+ if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->supported_modes & cfg.display_mode) {
+ cfg.sort_field = field;
+ printf("Auto-switched sort field to: %s\n", field->name);
+ break;
+ }
+ }
+ }
+
/* Read PSI statistics */
- read_psi_stats();
+ psi_ret = read_psi_stats();
/* Get container stats if container path provided */
if (cfg.container_path)
@@ -816,7 +1117,7 @@ int main(int argc, char **argv)
sort_tasks();
/* Display results to stdout or log file */
- display_results();
+ display_results(psi_ret);
/* Check for iterations */
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
@@ -826,32 +1127,14 @@ int main(int argc, char **argv)
if (cfg.output_one_time)
break;
- /* Check for 'q' key to quit */
- if (use_q_quit) {
- struct timeval tv = {cfg.delay, 0};
- fd_set readfds;
-
- FD_ZERO(&readfds);
- FD_SET(STDIN_FILENO, &readfds);
- int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
-
- if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
- char ch = 0;
-
- read(STDIN_FILENO, &ch, 1);
- if (ch == 'q' || ch == 'Q') {
- running = 0;
- break;
- }
- }
- } else {
- sleep(cfg.delay);
- }
+ /* Keypress for interactive usage */
+ keypress = check_for_keypress();
+ if (keypress)
+ handle_keypress(keypress, &running);
}
/* Restore terminal mode */
- if (use_q_quit)
- disable_raw_mode();
+ disable_raw_mode();
/* Cleanup */
close(nl_sd);
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
deleted file mode 100644
index d5dd96902817..000000000000
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __ARM_KVM_H__
-#define __ARM_KVM_H__
-
-#include <linux/types.h>
-#include <linux/psci.h>
-#include <asm/ptrace.h>
-
-#define __KVM_HAVE_GUEST_DEBUG
-#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
-#define __KVM_HAVE_VCPU_EVENTS
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
-/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */
-#define KVM_ARM_SVC_sp svc_regs[0]
-#define KVM_ARM_SVC_lr svc_regs[1]
-#define KVM_ARM_SVC_spsr svc_regs[2]
-#define KVM_ARM_ABT_sp abt_regs[0]
-#define KVM_ARM_ABT_lr abt_regs[1]
-#define KVM_ARM_ABT_spsr abt_regs[2]
-#define KVM_ARM_UND_sp und_regs[0]
-#define KVM_ARM_UND_lr und_regs[1]
-#define KVM_ARM_UND_spsr und_regs[2]
-#define KVM_ARM_IRQ_sp irq_regs[0]
-#define KVM_ARM_IRQ_lr irq_regs[1]
-#define KVM_ARM_IRQ_spsr irq_regs[2]
-
-/* Valid only for fiq_regs in struct kvm_regs */
-#define KVM_ARM_FIQ_r8 fiq_regs[0]
-#define KVM_ARM_FIQ_r9 fiq_regs[1]
-#define KVM_ARM_FIQ_r10 fiq_regs[2]
-#define KVM_ARM_FIQ_fp fiq_regs[3]
-#define KVM_ARM_FIQ_ip fiq_regs[4]
-#define KVM_ARM_FIQ_sp fiq_regs[5]
-#define KVM_ARM_FIQ_lr fiq_regs[6]
-#define KVM_ARM_FIQ_spsr fiq_regs[7]
-
-struct kvm_regs {
- struct pt_regs usr_regs; /* R0_usr - R14_usr, PC, CPSR */
- unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
- unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
- unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */
- unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
- unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
-};
-
-/* Supported Processor Types */
-#define KVM_ARM_TARGET_CORTEX_A15 0
-#define KVM_ARM_TARGET_CORTEX_A7 1
-#define KVM_ARM_NUM_TARGETS 2
-
-/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
-#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
-#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
-
-/* Supported device IDs */
-#define KVM_ARM_DEVICE_VGIC_V2 0
-
-/* Supported VGIC address types */
-#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
-#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
-
-#define KVM_VGIC_V2_DIST_SIZE 0x1000
-#define KVM_VGIC_V2_CPU_SIZE 0x2000
-
-/* Supported VGICv3 address types */
-#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
-#define KVM_VGIC_ITS_ADDR_TYPE 4
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
-
-#define KVM_VGIC_V3_DIST_SIZE SZ_64K
-#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
-#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
-
-#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
-#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
-
-struct kvm_vcpu_init {
- __u32 target;
- __u32 features[7];
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-struct kvm_guest_debug_arch {
-};
-
-struct kvm_debug_exit_arch {
-};
-
-struct kvm_sync_regs {
- /* Used with KVM_CAP_ARM_USER_IRQ */
- __u64 device_irq_level;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-/* for KVM_GET/SET_VCPU_EVENTS */
-struct kvm_vcpu_events {
- struct {
- __u8 serror_pending;
- __u8 serror_has_esr;
- __u8 ext_dabt_pending;
- /* Align it to 8 bytes */
- __u8 pad[5];
- __u64 serror_esr;
- } exception;
- __u32 reserved[12];
-};
-
-/* If you need to interpret the index values, here is the key: */
-#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
-#define KVM_REG_ARM_COPROC_SHIFT 16
-#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007
-#define KVM_REG_ARM_32_OPC2_SHIFT 0
-#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078
-#define KVM_REG_ARM_OPC1_SHIFT 3
-#define KVM_REG_ARM_CRM_MASK 0x0000000000000780
-#define KVM_REG_ARM_CRM_SHIFT 7
-#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
-#define KVM_REG_ARM_32_CRN_SHIFT 11
-/*
- * For KVM currently all guest registers are nonsecure, but we reserve a bit
- * in the encoding to distinguish secure from nonsecure for AArch32 system
- * registers that are banked by security. This is 1 for the secure banked
- * register, and 0 for the nonsecure banked register or if the register is
- * not banked by security.
- */
-#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
-#define KVM_REG_ARM_SECURE_SHIFT 28
-
-#define ARM_CP15_REG_SHIFT_MASK(x,n) \
- (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
-
-#define __ARM_CP15_REG(op1,crn,crm,op2) \
- (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
- ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
- ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
- ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
- ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
-
-#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
-
-#define __ARM_CP15_REG64(op1,crm) \
- (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
-#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
-
-/* PL1 Physical Timer Registers */
-#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1)
-#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14)
-#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14)
-
-/* Virtual Timer Registers */
-#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
-#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
-
-/* Normal registers are mapped as coprocessor 16. */
-#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
-
-/* Some registers need more space to represent values. */
-#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
-#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
-#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
-#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
-#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
-
-/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
-#define KVM_REG_ARM_VFP (0x0012 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_VFP_MASK 0x000000000000FFFF
-#define KVM_REG_ARM_VFP_BASE_REG 0x0
-#define KVM_REG_ARM_VFP_FPSID 0x1000
-#define KVM_REG_ARM_VFP_FPSCR 0x1001
-#define KVM_REG_ARM_VFP_MVFR1 0x1006
-#define KVM_REG_ARM_VFP_MVFR0 0x1007
-#define KVM_REG_ARM_VFP_FPEXC 0x1008
-#define KVM_REG_ARM_VFP_FPINST 0x1009
-#define KVM_REG_ARM_VFP_FPINST2 0x100A
-
-/* KVM-as-firmware specific pseudo-registers */
-#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_FW | ((r) & 0xffff))
-#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
-
-/* Device Control API: ARM VGIC */
-#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
-#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
-#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
-#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
-#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
- (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
-#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
-#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
-#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
-#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
-#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
-#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
-#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
-#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
-#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
-#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
- (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
-#define VGIC_LEVEL_INFO_LINE_LEVEL 0
-
-/* Device Control API on vcpu fd */
-#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_TIMER_CTRL 1
-#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
-#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
-
-#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
-#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
-#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
-#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
-#define KVM_DEV_ARM_ITS_CTRL_RESET 4
-
-/* KVM_IRQ_LINE irq field index values */
-#define KVM_ARM_IRQ_VCPU2_SHIFT 28
-#define KVM_ARM_IRQ_VCPU2_MASK 0xf
-#define KVM_ARM_IRQ_TYPE_SHIFT 24
-#define KVM_ARM_IRQ_TYPE_MASK 0xf
-#define KVM_ARM_IRQ_VCPU_SHIFT 16
-#define KVM_ARM_IRQ_VCPU_MASK 0xff
-#define KVM_ARM_IRQ_NUM_SHIFT 0
-#define KVM_ARM_IRQ_NUM_MASK 0xffff
-
-/* irq_type field */
-#define KVM_ARM_IRQ_TYPE_CPU 0
-#define KVM_ARM_IRQ_TYPE_SPI 1
-#define KVM_ARM_IRQ_TYPE_PPI 2
-
-/* out-of-kernel GIC cpu interrupt injection irq_number field */
-#define KVM_ARM_IRQ_CPU_IRQ 0
-#define KVM_ARM_IRQ_CPU_FIQ 1
-
-/*
- * This used to hold the highest supported SPI, but it is now obsolete
- * and only here to provide source code level compatibility with older
- * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
- */
-#ifndef __KERNEL__
-#define KVM_ARM_IRQ_GIC_MAX 127
-#endif
-
-/* One single KVM irqchip, ie. the VGIC */
-#define KVM_NR_IRQCHIPS 1
-
-/* PSCI interface */
-#define KVM_PSCI_FN_BASE 0x95c1ba5e
-#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
-
-#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
-#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
-#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
-#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-
-#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
-#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
-#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
-#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
-
-#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 9a5d85cfd1fb..f898c47e551f 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -75,11 +75,13 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -94,6 +96,7 @@
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
#define ARM_CPU_PART_CORTEX_X925 0xD85
#define ARM_CPU_PART_CORTEX_A725 0xD87
+#define ARM_CPU_PART_CORTEX_A720AE 0xD89
#define ARM_CPU_PART_NEOVERSE_N3 0xD8E
#define APM_CPU_PART_XGENE 0x000
@@ -119,9 +122,11 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -129,6 +134,7 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
@@ -159,11 +165,13 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
@@ -178,6 +186,7 @@
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
+#define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE)
#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
@@ -196,13 +205,26 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
@@ -225,7 +247,7 @@
#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0))
#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/sysreg.h>
@@ -291,6 +313,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void)
return read_cpuid(MIDR_EL1);
}
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
return read_cpuid(MPIDR_EL1);
@@ -310,6 +340,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
return read_cpuid(CTR_EL0);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h
index bd592ca81571..f3c6403e5ef2 100644
--- a/tools/arch/arm64/include/asm/esr.h
+++ b/tools/arch/arm64/include/asm/esr.h
@@ -141,6 +141,8 @@
#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
#define ESR_ELx_AR_SHIFT (14)
#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_VNCR_SHIFT (13)
+#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
@@ -385,7 +387,7 @@
#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/types.h>
static inline unsigned long esr_brk_comment(unsigned long esr)
@@ -450,6 +452,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr)
}
const char *esr_get_class_string(unsigned long esr);
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ESR_H */
diff --git a/tools/arch/arm64/include/asm/gpr-num.h b/tools/arch/arm64/include/asm/gpr-num.h
index 05da4a7c5788..a114e4f8209b 100644
--- a/tools/arch/arm64/include/asm/gpr-num.h
+++ b/tools/arch/arm64/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__gpr_num_x\num, \num
@@ -11,7 +11,7 @@
.equ .L__gpr_num_xzr, 31
.equ .L__gpr_num_wzr, 31
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __DEFINE_ASM_GPR_NUMS \
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
@@ -21,6 +21,6 @@
" .equ .L__gpr_num_xzr, 31\n" \
" .equ .L__gpr_num_wzr, 31\n"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_GPR_NUM_H */
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index 690b6ebd118f..178b7322bf04 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -51,7 +51,7 @@
#ifndef CONFIG_BROKEN_GAS_INST
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
// The space separator is omitted so that __emit_inst(x) can be parsed as
// either an assembler directive or an assembler macro argument.
#define __emit_inst(x) .inst(x)
@@ -70,11 +70,11 @@
(((x) >> 24) & 0x000000ff))
#endif /* CONFIG_CPU_BIG_ENDIAN */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __emit_inst(x) .long __INSTR_BSWAP(x)
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_BROKEN_GAS_INST */
@@ -1078,12 +1078,7 @@
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
GCS_CAP_VALID_TOKEN)
-#define ARM64_FEATURE_FIELD_BITS 4
-
-/* Defined for compatibility only, do not add new users. */
-#define ARM64_FEATURE_MASK(x) (x##_MASK)
-
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index ed5f3892674c..a792a599b9d6 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -31,7 +31,7 @@
#define KVM_SPSR_FIQ 4
#define KVM_NR_SPSR 5
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/psci.h>
#include <linux/types.h>
#include <asm/ptrace.h>
diff --git a/tools/arch/loongarch/include/asm/inst.h b/tools/arch/loongarch/include/asm/inst.h
index c25b5853181d..d68fad63c8b7 100644
--- a/tools/arch/loongarch/include/asm/inst.h
+++ b/tools/arch/loongarch/include/asm/inst.h
@@ -51,6 +51,10 @@ enum reg2i16_op {
bgeu_op = 0x1b,
};
+enum reg3_op {
+ amswapw_op = 0x70c0,
+};
+
struct reg0i15_format {
unsigned int immediate : 15;
unsigned int opcode : 17;
@@ -96,6 +100,13 @@ struct reg2i16_format {
unsigned int opcode : 6;
};
+struct reg3_format {
+ unsigned int rd : 5;
+ unsigned int rj : 5;
+ unsigned int rk : 5;
+ unsigned int opcode : 17;
+};
+
union loongarch_instruction {
unsigned int word;
struct reg0i15_format reg0i15_format;
@@ -105,6 +116,7 @@ union loongarch_instruction {
struct reg2i12_format reg2i12_format;
struct reg2i14_format reg2i14_format;
struct reg2i16_format reg2i16_format;
+ struct reg3_format reg3_format;
};
#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index eaeda001784e..077c5437f521 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -1,18 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h
index 0dfc09254f99..21d8cee04638 100644
--- a/tools/arch/riscv/include/asm/csr.h
+++ b/tools/arch/riscv/include/asm/csr.h
@@ -167,7 +167,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
- (_AC(1, UL) << IRQ_S_EXT))
+ (_AC(1, UL) << IRQ_S_EXT) | \
+ (_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@@ -280,7 +281,7 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
-#define CSR_SSCOUNTOVF 0xda0
+#define CSR_SCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@@ -468,13 +469,13 @@
#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER)
#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_STR(x) x
#else
#define __ASM_STR(x) #x
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define csr_swap(csr, val) \
({ \
@@ -536,6 +537,6 @@
: "memory"); \
})
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_RISCV_CSR_H */
diff --git a/tools/arch/riscv/include/asm/vdso/processor.h b/tools/arch/riscv/include/asm/vdso/processor.h
index 662aca039848..0665b117f30f 100644
--- a/tools/arch/riscv/include/asm/vdso/processor.h
+++ b/tools/arch/riscv/include/asm/vdso/processor.h
@@ -2,7 +2,7 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm-generic/barrier.h>
@@ -27,6 +27,6 @@ static inline void cpu_relax(void)
barrier();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/tools/arch/s390/include/uapi/asm/bitsperlong.h b/tools/arch/s390/include/uapi/asm/bitsperlong.h
index d2bb620119bf..a226a1686a53 100644
--- a/tools/arch/s390/include/uapi/asm/bitsperlong.h
+++ b/tools/arch/s390/include/uapi/asm/bitsperlong.h
@@ -2,11 +2,7 @@
#ifndef __ASM_S390_BITSPERLONG_H
#define __ASM_S390_BITSPERLONG_H
-#ifndef __s390x__
-#define __BITS_PER_LONG 32
-#else
#define __BITS_PER_LONG 64
-#endif
#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
deleted file mode 100644
index 84606b8cc49e..000000000000
--- a/tools/arch/s390/include/uapi/asm/kvm_perf.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Definitions for perf-kvm on s390
- *
- * Copyright 2014 IBM Corp.
- * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
- */
-
-#ifndef __LINUX_KVM_PERF_S390_H
-#define __LINUX_KVM_PERF_S390_H
-
-#include <asm/sie.h>
-
-#define DECODE_STR_LEN 40
-
-#define VCPU_ID "id"
-
-#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
-#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
-#define KVM_EXIT_REASON "icptcode"
-
-#endif
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index dbe39b44256b..6e1b357c374b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -108,18 +108,6 @@
#endif
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
#ifdef __KERNEL__
/* Exception table entry */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..ccc01ad6ff7c 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -218,6 +218,7 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
@@ -319,7 +320,7 @@
#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
@@ -406,9 +407,12 @@
#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
@@ -443,6 +447,7 @@
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
@@ -456,10 +461,14 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
+
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
@@ -487,6 +496,12 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
/*
* BUG word(s)
@@ -542,5 +557,6 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 183aa662b165..099e926595bd 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -37,6 +37,8 @@
#define INAT_PFX_EVEX 15 /* EVEX prefix */
/* x86-64 REX2 prefix */
#define INAT_PFX_REX2 16 /* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP 17 /* 0x8F */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -77,6 +79,7 @@
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK INAT_VEXOK
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
@@ -111,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+ insn_byte_t map_select);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -164,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
@@ -229,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
return attr & INAT_VEXOK;
}
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+ return attr & INAT_XOPOK;
+}
+
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 0e5abd896ad4..8f10f2943370 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -71,7 +71,10 @@ struct insn {
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
- struct insn_field vex_prefix; /* VEX prefix */
+ union {
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field xop_prefix; /* XOP prefix */
+ };
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
@@ -135,6 +138,17 @@ struct insn {
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */
+#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */
+#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */
+#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */
+#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */
+#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */
+#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */
+#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */
+#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */
+#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern int insn_get_prefixes(struct insn *insn);
@@ -178,7 +192,7 @@ static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
return X86_REX2_M(insn->rex_prefix.bytes[1]);
}
-static inline int insn_is_avx(struct insn *insn)
+static inline int insn_is_avx_or_xop(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
@@ -192,6 +206,22 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+ insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+ return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+ if (!insn_is_avx_or_xop(insn))
+ return 0;
+
+ return avx_insn_is_xop(insn);
+}
+
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
@@ -222,11 +252,26 @@ static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
return X86_VEX_W(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+ if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */
+ return 0;
+ return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+ return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
- if (insn_is_avx(insn))
+ if (insn_is_avx_or_xop(insn)) {
+ if (avx_insn_is_xop(insn))
+ return insn_xop_p_bits(insn);
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+ }
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
@@ -267,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
/**
* for_each_insn_prefix() -- Iterate prefixes in the instruction
* @insn: Pointer to struct insn.
- * @idx: Index storage.
* @prefix: Prefix byte.
*
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
@@ -276,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
* Since prefixes.nbytes can be bigger than 4 if some prefixes
* are repeated, it cannot be used for looping over the prefixes.
*/
-#define for_each_insn_prefix(insn, idx, prefix) \
- for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+#define for_each_insn_prefix(insn, prefix) \
+ for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/tools/arch/x86/include/asm/io.h b/tools/arch/x86/include/asm/io.h
new file mode 100644
index 000000000000..ecad61a3ea52
--- /dev/null
+++ b/tools/arch/x86/include/asm/io.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_X86_IO_H
+#define _TOOLS_ASM_X86_IO_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "special_insns.h"
+
+#define build_mmio_read(name, size, type, reg, barrier) \
+static inline type name(const volatile void __iomem *addr) \
+{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
+:"m" (*(volatile type __force *)addr) barrier); return ret; }
+
+#define build_mmio_write(name, size, type, reg, barrier) \
+static inline void name(type val, volatile void __iomem *addr) \
+{ asm volatile("mov" size " %0,%1": :reg (val), \
+"m" (*(volatile type __force *)addr) barrier); }
+
+build_mmio_read(readb, "b", unsigned char, "=q", :"memory")
+build_mmio_read(readw, "w", unsigned short, "=r", :"memory")
+build_mmio_read(readl, "l", unsigned int, "=r", :"memory")
+
+build_mmio_read(__readb, "b", unsigned char, "=q", )
+build_mmio_read(__readw, "w", unsigned short, "=r", )
+build_mmio_read(__readl, "l", unsigned int, "=r", )
+
+build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
+build_mmio_write(writew, "w", unsigned short, "r", :"memory")
+build_mmio_write(writel, "l", unsigned int, "r", :"memory")
+
+build_mmio_write(__writeb, "b", unsigned char, "q", )
+build_mmio_write(__writew, "w", unsigned short, "r", )
+build_mmio_write(__writel, "l", unsigned int, "r", )
+
+#define readb readb
+#define readw readw
+#define readl readl
+#define readb_relaxed(a) __readb(a)
+#define readw_relaxed(a) __readw(a)
+#define readl_relaxed(a) __readl(a)
+#define __raw_readb __readb
+#define __raw_readw __readw
+#define __raw_readl __readl
+
+#define writeb writeb
+#define writew writew
+#define writel writel
+#define writeb_relaxed(v, a) __writeb(v, a)
+#define writew_relaxed(v, a) __writew(v, a)
+#define writel_relaxed(v, a) __writel(v, a)
+#define __raw_writeb __writeb
+#define __raw_writew __writew
+#define __raw_writel __writel
+
+#ifdef __x86_64__
+
+build_mmio_read(readq, "q", u64, "=r", :"memory")
+build_mmio_read(__readq, "q", u64, "=r", )
+build_mmio_write(writeq, "q", u64, "r", :"memory")
+build_mmio_write(__writeq, "q", u64, "r", )
+
+#define readq_relaxed(a) __readq(a)
+#define writeq_relaxed(v, a) __writeq(v, a)
+
+#define __raw_readq __readq
+#define __raw_writeq __writeq
+
+/* Let people know that we have them */
+#define readq readq
+#define writeq writeq
+
+#endif /* __x86_64__ */
+
+#include <asm-generic/io.h>
+
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
+ size_t count)
+{
+ const u8 *from = src;
+ const u8 *end = from + count * 64;
+
+ while (from < end) {
+ movdir64b(dst, from);
+ from += 64;
+ }
+}
+
+#endif /* _TOOLS_ASM_X86_IO_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 5cfb5d74dd5f..9e1720d73244 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -315,12 +315,17 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
-#define PERF_CAP_ARCH_REG BIT_ULL(7)
-#define PERF_CAP_PEBS_FORMAT 0xf00
-#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
-#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
- PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
+
+#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+ PERF_CAP_PEBS_TIMING_INFO)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -419,6 +424,7 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
@@ -630,6 +636,11 @@
#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_TW_CFG 0xc0011023
@@ -698,8 +709,15 @@
#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
#define MSR_AMD64_SNP_SMT_PROT_BIT 17
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
#define MSR_AMD64_RMP_CFG 0xc0010136
@@ -732,6 +750,12 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
+
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501
+#define MSR_AMD_WORKLOAD_HRST 0xc0000502
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
@@ -831,6 +855,7 @@
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
#define MSR_K7_HWCR_CPB_DIS_BIT 25
@@ -1216,6 +1241,8 @@
/* - AMD: */
#define MSR_IA32_MBA_BW_BASE 0xc0000200
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
/* AMD-V MSRs */
diff --git a/tools/arch/x86/include/asm/special_insns.h b/tools/arch/x86/include/asm/special_insns.h
new file mode 100644
index 000000000000..04af42a99c38
--- /dev/null
+++ b/tools/arch/x86/include/asm/special_insns.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_X86_SPECIAL_INSNS_H
+#define _TOOLS_ASM_X86_SPECIAL_INSNS_H
+
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
+{
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+
+ /*
+ * MOVDIR64B %(rdx), rax.
+ *
+ * Both __src and __dst must be memory constraints in order to tell the
+ * compiler that no other memory accesses should be reordered around
+ * this one.
+ *
+ * Also, both must be supplied as lvalues because this tells
+ * the compiler what the object is (its size) the instruction accesses.
+ * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+ */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+}
+
+#endif /* _TOOLS_ASM_X86_SPECIAL_INSNS_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..d420c9c066d4 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -35,6 +35,11 @@
#define MC_VECTOR 18
#define XM_VECTOR 19
#define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
@@ -411,6 +416,35 @@ struct kvm_xcrs {
__u64 padding[16];
};
+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_KVM 3
+
+#define KVM_X86_KVM_REG_SIZE(reg) \
+({ \
+ reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ID(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP 0
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
@@ -965,7 +999,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/tools/arch/x86/include/uapi/asm/kvm_perf.h b/tools/arch/x86/include/uapi/asm/kvm_perf.h
deleted file mode 100644
index 125cf5cdf6c5..000000000000
--- a/tools/arch/x86/include/uapi/asm/kvm_perf.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_KVM_PERF_H
-#define _ASM_X86_KVM_PERF_H
-
-#include <asm/svm.h>
-#include <asm/vmx.h>
-#include <asm/kvm.h>
-
-#define DECODE_STR_LEN 20
-
-#define VCPU_ID "vcpu_id"
-
-#define KVM_ENTRY_TRACE "kvm:kvm_entry"
-#define KVM_EXIT_TRACE "kvm:kvm_exit"
-#define KVM_EXIT_REASON "exit_reason"
-
-#endif /* _ASM_X86_KVM_PERF_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 9c640a521a67..650e3256ea7d 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -118,6 +118,10 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
+#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0f4a4cf84a7..1baa86dfe029 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -93,7 +93,10 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
+#define EXIT_REASON_SEAMCALL 76
#define EXIT_REASON_TDCALL 77
+#define EXIT_REASON_MSR_READ_IMM 84
+#define EXIT_REASON_MSR_WRITE_IMM 85
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -158,7 +161,9 @@
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
- { EXIT_REASON_TDCALL, "TDCALL" }
+ { EXIT_REASON_TDCALL, "TDCALL" }, \
+ { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
+ { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c
index dfbcc6405941..ffcb0e27453b 100644
--- a/tools/arch/x86/lib/inat.c
+++ b/tools/arch/x86/lib/inat.c
@@ -81,3 +81,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
return table[opcode];
}
+insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, insn_byte_t map_select)
+{
+ const insn_attr_t *table;
+
+ if (map_select < X86_XOP_M_MIN || map_select > X86_XOP_M_MAX)
+ return 0;
+ map_select -= X86_XOP_M_MIN;
+ /* At first, this checks the master table */
+ table = inat_xop_tables[map_select];
+ if (!table)
+ return 0;
+ return table[opcode];
+}
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index bce69c6bfa69..1d1c57c74d1f 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -200,12 +200,15 @@ found:
}
insn->rex_prefix.got = 1;
- /* Decode VEX prefix */
+ /* Decode VEX/XOP prefix */
b = peek_next(insn_byte_t, insn);
- attr = inat_get_opcode_attribute(b);
- if (inat_is_vex_prefix(attr)) {
+ if (inat_is_vex_prefix(attr) || inat_is_xop_prefix(attr)) {
insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
- if (!insn->x86_64) {
+
+ if (inat_is_xop_prefix(attr) && X86_MODRM_REG(b2) == 0) {
+ /* Grp1A.0 is always POP Ev */
+ goto vex_end;
+ } else if (!insn->x86_64) {
/*
* In 32-bits mode, if the [7:6] bits (mod bits of
* ModRM) on the second byte are not 11b, it is
@@ -226,13 +229,13 @@ found:
if (insn->x86_64 && X86_VEX_W(b2))
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
- } else if (inat_is_vex3_prefix(attr)) {
+ } else if (inat_is_vex3_prefix(attr) || inat_is_xop_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn_set_byte(&insn->vex_prefix, 2, b2);
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
- /* VEX.W overrides opnd_size */
+ /* VEX.W/XOP.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
@@ -288,9 +291,22 @@ int insn_get_opcode(struct insn *insn)
insn_set_byte(opcode, 0, op);
opcode->nbytes = 1;
- /* Check if there is VEX prefix or not */
- if (insn_is_avx(insn)) {
+ /* Check if there is VEX/XOP prefix or not */
+ if (insn_is_avx_or_xop(insn)) {
insn_byte_t m, p;
+
+ /* XOP prefix has different encoding */
+ if (unlikely(avx_insn_is_xop(insn))) {
+ m = insn_xop_map_bits(insn);
+ insn->attr = inat_get_xop_attribute(op, m);
+ if (!inat_accept_xop(insn->attr)) {
+ insn->attr = 0;
+ return -EINVAL;
+ }
+ /* XOP has only 1 byte for opcode */
+ goto end;
+ }
+
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
@@ -383,7 +399,8 @@ int insn_get_modrm(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+ if (insn_is_avx_or_xop(insn) && !inat_accept_vex(insn->attr) &&
+ !inat_accept_xop(insn->attr)) {
/* Bad insn */
insn->attr = 0;
return -EINVAL;
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 262f7ca1fb95..2a4e69ecc2de 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -27,6 +27,11 @@
# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
# (v): this opcode requires VEX prefix.
# (v1): this opcode only supports 128bit VEX.
+# (xop): this opcode accepts XOP prefix.
+#
+# XOP Superscripts
+# (W=0): this opcode requires XOP.W == 0
+# (W=1): this opcode requires XOP.W == 1
#
# Last Prefix Superscripts
# - (66): the last prefix is 0x66
@@ -194,7 +199,7 @@ AVXcode:
8c: MOV Ev,Sw
8d: LEA Gv,M
8e: MOV Sw,Ew
-8f: Grp1A (1A) | POP Ev (d64)
+8f: Grp1A (1A) | POP Ev (d64) | XOP (Prefix)
# 0x90 - 0x9f
90: NOP | PAUSE (F3) | XCHG r8,rAX
91: XCHG rCX/r9,rAX
@@ -1106,6 +1111,84 @@ AVXcode: 7
f8: URDMSR Rq,Id (F2),(v1),(11B) | UWRMSR Id,Rq (F3),(v1),(11B)
EndTable
+# From AMD64 Architecture Programmer's Manual Vol3, Appendix A.1.5
+Table: XOP map 8h
+Referrer:
+XOPcode: 0
+85: VPMACSSWW Vo,Ho,Wo,Lo
+86: VPMACSSWD Vo,Ho,Wo,Lo
+87: VPMACSSDQL Vo,Ho,Wo,Lo
+8e: VPMACSSDD Vo,Ho,Wo,Lo
+8f: VPMACSSDQH Vo,Ho,Wo,Lo
+95: VPMACSWW Vo,Ho,Wo,Lo
+96: VPMACSWD Vo,Ho,Wo,Lo
+97: VPMACSDQL Vo,Ho,Wo,Lo
+9e: VPMACSDD Vo,Ho,Wo,Lo
+9f: VPMACSDQH Vo,Ho,Wo,Lo
+a2: VPCMOV Vx,Hx,Wx,Lx (W=0) | VPCMOV Vx,Hx,Lx,Wx (W=1)
+a3: VPPERM Vo,Ho,Wo,Lo (W=0) | VPPERM Vo,Ho,Lo,Wo (W=1)
+a6: VPMADCSSWD Vo,Ho,Wo,Lo
+b6: VPMADCSWD Vo,Ho,Wo,Lo
+c0: VPROTB Vo,Wo,Ib
+c1: VPROTW Vo,Wo,Ib
+c2: VPROTD Vo,Wo,Ib
+c3: VPROTQ Vo,Wo,Ib
+cc: VPCOMccB Vo,Ho,Wo,Ib
+cd: VPCOMccW Vo,Ho,Wo,Ib
+ce: VPCOMccD Vo,Ho,Wo,Ib
+cf: VPCOMccQ Vo,Ho,Wo,Ib
+ec: VPCOMccUB Vo,Ho,Wo,Ib
+ed: VPCOMccUW Vo,Ho,Wo,Ib
+ee: VPCOMccUD Vo,Ho,Wo,Ib
+ef: VPCOMccUQ Vo,Ho,Wo,Ib
+EndTable
+
+Table: XOP map 9h
+Referrer:
+XOPcode: 1
+01: GrpXOP1
+02: GrpXOP2
+12: GrpXOP3
+80: VFRCZPS Vx,Wx
+81: VFRCZPD Vx,Wx
+82: VFRCZSS Vq,Wss
+83: VFRCZSD Vq,Wsd
+90: VPROTB Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+91: VPROTW Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+92: VPROTD Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+93: VPROTQ Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+94: VPSHLB Vo,Wo,Ho (W=0) | VPSHLB Vo,Ho,Wo (W=1)
+95: VPSHLW Vo,Wo,Ho (W=0) | VPSHLW Vo,Ho,Wo (W=1)
+96: VPSHLD Vo,Wo,Ho (W=0) | VPSHLD Vo,Ho,Wo (W=1)
+97: VPSHLQ Vo,Wo,Ho (W=0) | VPSHLQ Vo,Ho,Wo (W=1)
+98: VPSHAB Vo,Wo,Ho (W=0) | VPSHAB Vo,Ho,Wo (W=1)
+99: VPSHAW Vo,Wo,Ho (W=0) | VPSHAW Vo,Ho,Wo (W=1)
+9a: VPSHAD Vo,Wo,Ho (W=0) | VPSHAD Vo,Ho,Wo (W=1)
+9b: VPSHAQ Vo,Wo,Ho (W=0) | VPSHAQ Vo,Ho,Wo (W=1)
+c1: VPHADDBW Vo,Wo
+c2: VPHADDBD Vo,Wo
+c3: VPHADDBQ Vo,Wo
+c6: VPHADDWD Vo,Wo
+c7: VPHADDWQ Vo,Wo
+cb: VPHADDDQ Vo,Wo
+d1: VPHADDUBWD Vo,Wo
+d2: VPHADDUBD Vo,Wo
+d3: VPHADDUBQ Vo,Wo
+d6: VPHADDUWD Vo,Wo
+d7: VPHADDUWQ Vo,Wo
+db: VPHADDUDQ Vo,Wo
+e1: VPHSUBBW Vo,Wo
+e2: VPHSUBWD Vo,Wo
+e3: VPHSUBDQ Vo,Wo
+EndTable
+
+Table: XOP map Ah
+Referrer:
+XOPcode: 2
+10: BEXTR Gy,Ey,Id
+12: GrpXOP4
+EndTable
+
GrpTable: Grp1
0: ADD
1: OR
@@ -1320,3 +1403,29 @@ GrpTable: GrpRNG
4: xcrypt-cfb
5: xcrypt-ofb
EndTable
+
+# GrpXOP1-4 is shown in AMD APM Vol.3 Appendix A as XOP group #1-4
+GrpTable: GrpXOP1
+1: BLCFILL By,Ey (xop)
+2: BLSFILL By,Ey (xop)
+3: BLCS By,Ey (xop)
+4: TZMSK By,Ey (xop)
+5: BLCIC By,Ey (xop)
+6: BLSIC By,Ey (xop)
+7: T1MSKC By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP2
+1: BLCMSK By,Ey (xop)
+6: BLCI By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP3
+0: LLWPCB Ry (xop)
+1: SLWPCB Ry (xop)
+EndTable
+
+GrpTable: GrpXOP4
+0: LWPINS By,Ed,Id (xop)
+1: LWPVAL By,Ed,Id (xop)
+EndTable
diff --git a/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
new file mode 100644
index 000000000000..cc4c7a3e6c2e
--- /dev/null
+++ b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
@@ -0,0 +1,34 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# Usage: awk -f gen-cpu-feature-names-x86.awk cpufeatures.h > cpu-feature-names.c
+#
+
+BEGIN {
+ print "/* cpu feature name array generated from cpufeatures.h */"
+ print "/* Do not change this code. */"
+ print
+ print "static const char *cpu_feature_names[(NCAPINTS+NBUGINTS)*32] = {"
+
+ value_expr = "\\([0-9*+ ]+\\)"
+}
+
+/^#define X86_FEATURE_/ {
+ if (match($0, value_expr)) {
+ value = substr($0, RSTART + 1, RLENGTH - 2)
+ print "\t[" value "] = \"" $2 "\","
+ }
+}
+
+/^#define X86_BUG_/ {
+ if (match($0, value_expr)) {
+ value = substr($0, RSTART + 1, RLENGTH - 2)
+ print "\t[NCAPINTS*32+(" value ")] = \"" $2 "\","
+ }
+}
+
+END {
+ print "};"
+}
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 2c19d7fc8a85..7ea1b75e59b7 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -21,6 +21,7 @@ function clear_vars() {
eid = -1 # escape id
gid = -1 # group id
aid = -1 # AVX id
+ xopid = -1 # XOP id
tname = ""
}
@@ -39,9 +40,11 @@ BEGIN {
ggid = 1
geid = 1
gaid = 0
+ gxopid = 0
delete etable
delete gtable
delete atable
+ delete xoptable
opnd_expr = "^[A-Za-z/]"
ext_expr = "^\\("
@@ -61,6 +64,7 @@ BEGIN {
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Lo"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
@@ -87,6 +91,8 @@ BEGIN {
evexonly_expr = "\\(ev\\)"
# (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size
evex_scalable_expr = "\\(es\\)"
+ # All opcodes in XOP table or with (xop) superscript accept XOP prefix
+ xopok_expr = "\\(xop\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -106,6 +112,7 @@ BEGIN {
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
prefix_num["EVEX"] = "INAT_PFX_EVEX"
prefix_num["REX2"] = "INAT_PFX_REX2"
+ prefix_num["XOP"] = "INAT_PFX_XOP"
clear_vars()
}
@@ -147,6 +154,7 @@ function array_size(arr, i,c) {
if (NF != 1) {
# AVX/escape opcode table
aid = $2
+ xopid = -1
if (gaid <= aid)
gaid = aid + 1
if (tname == "") # AVX only opcode table
@@ -156,6 +164,20 @@ function array_size(arr, i,c) {
tname = "inat_primary_table"
}
+/^XOPcode:/ {
+ if (NF != 1) {
+ # XOP opcode table
+ xopid = $2
+ aid = -1
+ if (gxopid <= xopid)
+ gxopid = xopid + 1
+ if (tname == "") # XOP only opcode table
+ tname = sprintf("inat_xop_table_%d", $2)
+ }
+ if (xopid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
/^GrpTable:/ {
print "/* " $0 " */"
if (!($2 in group))
@@ -206,6 +228,8 @@ function print_table(tbl,name,fmt,n)
etable[eid,0] = tname
if (aid >= 0)
atable[aid,0] = tname
+ else if (xopid >= 0)
+ xoptable[xopid] = tname
}
if (array_size(lptable1) != 0) {
print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
@@ -347,6 +371,8 @@ function convert_operands(count,opnd, i,j,imm,mod)
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
+ else if (match(ext, xopok_expr) || xopid >= 0)
+ flags = add_flags(flags, "INAT_XOPOK")
# check prefixes
if (match(ext, prefix_expr)) {
@@ -413,6 +439,14 @@ END {
print " ["i"]["j"] = "atable[i,j]","
print "};\n"
+ print "/* XOP opcode map array */"
+ print "const insn_attr_t * const inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1]" \
+ " = {"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print " ["i"] = "xoptable[i]","
+ print "};"
+
print "#else /* !__BOOT_COMPRESSED */\n"
print "/* Escape opcode map array */"
@@ -430,6 +464,10 @@ END {
"[INAT_LSTPFX_MAX + 1];"
print ""
+ print "/* XOP opcode map array */"
+ print "static const insn_attr_t *inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1];"
+ print ""
+
print "static void inat_init_tables(void)"
print "{"
@@ -455,6 +493,12 @@ END {
if (atable[i,j])
print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+ print ""
+ print "\t/* Print XOP opcode map array */"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print "\tinat_xop_tables["i"] = "xoptable[i]";"
+
print "}"
print "#endif"
}
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 57c669d2aa90..55d59ed507d5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (stat.st_size < BOOTCONFIG_FOOTER_SIZE)
return 0;
- if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
return pr_errno("Failed to lseek for magic", -errno);
if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
@@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
return 0;
- if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
return pr_errno("Failed to lseek for size", -errno);
if (read(fd, &size, sizeof(uint32_t)) < 0)
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 062bbd6cd048..fd2585af1252 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -32,7 +32,7 @@ FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
FEATURE_DISPLAY = libbfd
check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean resolve_btfids_clean
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
check_feat := 0
@@ -70,7 +70,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
-all: $(PROGS) bpftool runqslower
+all: $(PROGS) bpftool
$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
@@ -86,7 +86,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
-clean: bpftool_clean runqslower_clean resolve_btfids_clean
+clean: bpftool_clean resolve_btfids_clean
$(call QUIET_CLEAN, bpf-progs)
$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
@@ -112,12 +112,6 @@ bpftool_install:
bpftool_clean:
$(call descend,bpftool,clean)
-runqslower:
- $(call descend,runqslower)
-
-runqslower_clean:
- $(call descend,runqslower,clean)
-
resolve_btfids:
$(call descend,resolve_btfids)
@@ -125,5 +119,4 @@ resolve_btfids_clean:
$(call descend,resolve_btfids,clean)
.PHONY: all install clean bpftool bpftool_install bpftool_clean \
- runqslower runqslower_clean \
resolve_btfids resolve_btfids_clean
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index ca860fd97d8d..d0a36f442db7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
-*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
+*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -186,6 +186,17 @@ OPTIONS
skeleton). A light skeleton contains a loader eBPF program. It does not use
the majority of the libbpf infrastructure, and does not need libelf.
+-S, --sign
+ For skeletons, generate a signed skeleton. This option must be used with
+ **-k** and **-i**. Using this flag implicitly enables **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required for signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required for
+ signing.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 252e4c538edb..1af3305ea2b2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,8 @@ MAP COMMANDS
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
+| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena**
+| | **insn_array** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index f69fd92df8d8..35aeeaf5f711 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -18,7 +18,7 @@ SYNOPSIS
*OPTIONS* := { |COMMON_OPTIONS| |
{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
-{ **-L** | **--use-loader** } }
+{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
@@ -182,7 +182,7 @@ bpftool prog tracelog
bpftool prog tracelog { stdout | stderr } *PROG*
Dump the BPF stream of the program. BPF programs can write to these streams
- at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write
+ at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write
error messages to the standard error stream. This facility should be used
only for debugging purposes.
@@ -248,6 +248,18 @@ OPTIONS
creating the maps, and loading the programs (see **bpftool prog tracelog**
as a way to dump those messages).
+-S, --sign
+ Enable signing of the BPF program before loading. This option must be
+ used with **-k** and **-i**. Using this flag implicitly enables
+ **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required when signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required when
+ signing.
+
EXAMPLES
========
**# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-token.rst b/tools/bpf/bpftool/Documentation/bpftool-token.rst
new file mode 100644
index 000000000000..d082c499cfe3
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-token.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+================
+bpftool-token
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF tokens
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+.. include:: substitutions.rst
+
+SYNOPSIS
+========
+
+**bpftool** [*OPTIONS*] **token** *COMMAND*
+
+*OPTIONS* := { |COMMON_OPTIONS| }
+
+*COMMANDS* := { **show** | **list** | **help** }
+
+TOKEN COMMANDS
+===============
+
+| **bpftool** **token** { **show** | **list** }
+| **bpftool** **token help**
+|
+
+DESCRIPTION
+===========
+bpftool token { show | list }
+ List BPF token information for each *bpffs* mount point containing token
+ information on the system. Information include mount point path, allowed
+ **bpf**\ () system call commands, maps, programs, and attach types for the
+ token.
+
+bpftool prog help
+ Print short help message.
+
+OPTIONS
+========
+.. include:: common_options.rst
+
+EXAMPLES
+========
+|
+| **# mkdir -p /sys/fs/bpf/token**
+| **# mount -t bpf bpffs /sys/fs/bpf/token** \
+| **-o delegate_cmds=prog_load:map_create** \
+| **-o delegate_progs=kprobe** \
+| **-o delegate_attachs=xdp**
+| **# bpftool token list**
+
+::
+
+ token_info /sys/fs/bpf/token
+ allowed_cmds:
+ map_create prog_load
+ allowed_maps:
+ allowed_progs:
+ kprobe
+ allowed_attachs:
+ xdp
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e9a5f006cd2..586d1b2595d1 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lz
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
+LIBS = $(LIBBPF) -lelf -lz -lcrypto
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
@@ -194,7 +194,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a759ba24471d..53bcfeb1a76e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -262,7 +262,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf --sign -i -k'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -283,7 +283,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned|-B|--base-btf)
+ file|pinned|-B|--base-btf|-i|-k)
_filedir
return 0
;;
@@ -296,13 +296,21 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i pprev
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]] &&
- [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
- words=( "${words[@]:0:i}" "${words[@]:i+1}" )
- [[ $i -le $cword ]] && cword=$(( cword - 1 ))
- else
- i=$(( ++i ))
- fi
+ case ${words[i]} in
+ # Remove option and its argument
+ -B|--base-btf|-i|-k)
+ words=( "${words[@]:0:i}" "${words[@]:i+2}" )
+ [[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 ))
+ ;;
+ # No argument, remove option only
+ -*)
+ words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+ [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+ ;;
+ *)
+ i=$(( ++i ))
+ ;;
+ esac
done
cur=${words[cword]}
prev=${words[cword - 1]}
@@ -1215,6 +1223,17 @@ _bpftool()
;;
esac
;;
+ token)
+ case $command in
+ show|list)
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 4e896d8a2416..def297e879f4 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
__u32 info_len = sizeof(info);
const char *prog_name = NULL;
struct btf *prog_btf = NULL;
- struct bpf_func_info finfo;
+ struct bpf_func_info finfo = {};
__u32 finfo_rec_size;
char prog_str[1024];
int err;
@@ -590,7 +590,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
case BTF_KIND_DATASEC:
return btf_dumper_datasec(d, type_id, data);
default:
- jsonw_printf(d->jw, "(unsupported-kind");
+ jsonw_printf(d->jw, "(unsupported-kind)");
return -EINVAL;
}
}
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 944ebe21a216..ec356deb27c9 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -2,6 +2,10 @@
// Copyright (C) 2017 Facebook
// Author: Roman Gushchin <guro@fb.com>
+#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#define _XOPEN_SOURCE 500
#include <errno.h>
#include <fcntl.h>
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b07317d2842f..e8daf963ecef 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <linux/filter.h>
#include <linux/limits.h>
@@ -31,6 +32,7 @@
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
#include <bpf/btf.h>
+#include <zlib.h>
#include "main.h"
@@ -1208,3 +1210,94 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
return 0;
}
+
+static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
+ char **value)
+{
+ char *sep;
+
+ while (gzgets(file, buf, n)) {
+ if (strncmp(buf, "CONFIG_", 7))
+ continue;
+
+ sep = strchr(buf, '=');
+ if (!sep)
+ continue;
+
+ /* Trim ending '\n' */
+ buf[strlen(buf) - 1] = '\0';
+
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1])
+ continue;
+
+ *value = sep + 1;
+ return true;
+ }
+
+ return false;
+}
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix)
+{
+ struct utsname utsn;
+ char path[PATH_MAX];
+ gzFile file = NULL;
+ char buf[4096];
+ char *value;
+ size_t i;
+ int ret = 0;
+
+ if (!requested_options || !out_values || num_options == 0)
+ return -1;
+
+ if (!uname(&utsn)) {
+ snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(path, "r");
+ }
+
+ if (!file) {
+ /* Some distributions build with CONFIG_IKCONFIG=y and put the
+ * config file at /proc/config.gz.
+ */
+ file = gzopen("/proc/config.gz", "r");
+ }
+
+ if (!file) {
+ p_info("skipping kernel config, can't open file: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) {
+ p_info("skipping kernel config, can't read from file: %s",
+ strerror(errno));
+ ret = -1;
+ goto end_parse;
+ }
+
+ if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
+ p_info("skipping kernel config, can't find correct file");
+ ret = -1;
+ goto end_parse;
+ }
+
+ while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
+ for (i = 0; i < num_options; i++) {
+ if ((define_prefix && !requested_options[i].macro_dump) ||
+ out_values[i] || strcmp(buf, requested_options[i].name))
+ continue;
+
+ out_values[i] = strdup(value);
+ }
+ }
+
+end_parse:
+ gzclose(file);
+ return ret;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 24fecdf8e430..0f6070a0c8e7 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -10,7 +10,6 @@
#ifdef USE_LIBCAP
#include <sys/capability.h>
#endif
-#include <sys/utsname.h>
#include <sys/vfs.h>
#include <linux/filter.h>
@@ -18,7 +17,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include <zlib.h>
#include "main.h"
@@ -327,40 +325,9 @@ static void probe_jit_limit(void)
}
}
-static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
- char **value)
-{
- char *sep;
-
- while (gzgets(file, buf, n)) {
- if (strncmp(buf, "CONFIG_", 7))
- continue;
-
- sep = strchr(buf, '=');
- if (!sep)
- continue;
-
- /* Trim ending '\n' */
- buf[strlen(buf) - 1] = '\0';
-
- /* Split on '=' and ensure that a value is present. */
- *sep = '\0';
- if (!sep[1])
- continue;
-
- *value = sep + 1;
- return true;
- }
-
- return false;
-}
-
static void probe_kernel_image_config(const char *define_prefix)
{
- static const struct {
- const char * const name;
- bool macro_dump;
- } options[] = {
+ struct kernel_config_option options[] = {
/* Enable BPF */
{ "CONFIG_BPF", },
/* Enable bpf() syscall */
@@ -435,52 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
- struct utsname utsn;
- char path[PATH_MAX];
- gzFile file = NULL;
- char buf[4096];
- char *value;
size_t i;
- if (!uname(&utsn)) {
- snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
-
- /* gzopen also accepts uncompressed files. */
- file = gzopen(path, "r");
- }
-
- if (!file) {
- /* Some distributions build with CONFIG_IKCONFIG=y and put the
- * config file at /proc/config.gz.
- */
- file = gzopen("/proc/config.gz", "r");
- }
- if (!file) {
- p_info("skipping kernel config, can't open file: %s",
- strerror(errno));
- goto end_parse;
- }
- /* Sanity checks */
- if (!gzgets(file, buf, sizeof(buf)) ||
- !gzgets(file, buf, sizeof(buf))) {
- p_info("skipping kernel config, can't read from file: %s",
- strerror(errno));
- goto end_parse;
- }
- if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
- p_info("skipping kernel config, can't find correct file");
- goto end_parse;
- }
-
- while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
- for (i = 0; i < ARRAY_SIZE(options); i++) {
- if ((define_prefix && !options[i].macro_dump) ||
- values[i] || strcmp(buf, options[i].name))
- continue;
-
- values[i] = strdup(value);
- }
- }
+ if (read_kernel_config(options, ARRAY_SIZE(options), values,
+ define_prefix))
+ return;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (define_prefix && !options[i].macro_dump)
@@ -488,10 +414,6 @@ static void probe_kernel_image_config(const char *define_prefix)
print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
-
-end_parse:
- if (file)
- gzclose(file);
}
static bool probe_bpf_syscall(const char *define_prefix)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 67a60114368f..993c7d9484a4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -688,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
{
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_load_and_run_opts sopts = {};
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
struct bpf_map *map;
+
char ident[256];
int err = 0;
+ if (sign_progs)
+ opts.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &opts);
if (err)
return err;
@@ -701,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
p_err("failed to load object file");
goto out;
}
+
/* If there was no error during load then gen_loader_opts
* are populated with the loader program.
*/
@@ -780,8 +788,52 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
print_hex(opts.insns, opts.insns_sz);
codegen("\
\n\
- \"; \n\
- \n\
+ \";\n");
+
+ if (sign_progs) {
+ sopts.insns = opts.insns;
+ sopts.insns_sz = opts.insns_sz;
+ sopts.excl_prog_hash = prog_sha;
+ sopts.excl_prog_hash_sz = sizeof(prog_sha);
+ sopts.signature = sig_buf;
+ sopts.signature_sz = MAX_SIG_SIZE;
+
+ err = bpftool_prog_sign(&sopts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ codegen("\
+ \n\
+ static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)sig_buf, sopts.signature_sz);
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)prog_sha, sizeof(prog_sha));
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ opts.signature = (void *)opts_sig; \n\
+ opts.signature_sz = sizeof(opts_sig) - 1; \n\
+ opts.excl_prog_hash = (void *)opts_excl_hash; \n\
+ opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\
+ opts.keyring_id = skel->keyring_id; \n\
+ ");
+ }
+
+ codegen("\
+ \n\
opts.ctx = (struct bpf_loader_ctx *)skel; \n\
opts.data_sz = sizeof(opts_data) - 1; \n\
opts.data = (void *)opts_data; \n\
@@ -1240,7 +1292,7 @@ static int do_skeleton(int argc, char **argv)
err = -errno;
libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
- goto out;
+ goto out_obj;
}
bpf_object__for_each_map(map, obj) {
@@ -1355,6 +1407,13 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
+ if (sign_progs) {
+ codegen("\
+ \n\
+ __s32 keyring_id; \n\
+ ");
+ }
+
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1552,6 +1611,7 @@ static int do_skeleton(int argc, char **argv)
err = 0;
out:
bpf_object__close(obj);
+out_obj:
if (obj_data)
munmap(obj_data, mmap_sz);
close(fd);
@@ -1930,7 +1990,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS " |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a773e05d5ade..bdcd717b0348 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -282,11 +282,52 @@ get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
return data;
}
+static bool is_x86_ibt_enabled(void)
+{
+#if defined(__x86_64__)
+ struct kernel_config_option options[] = {
+ { "CONFIG_X86_KERNEL_IBT", },
+ };
+ char *values[ARRAY_SIZE(options)] = { };
+ bool ret;
+
+ if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL))
+ return false;
+
+ ret = !!values[0];
+ free(values[0]);
+ return ret;
+#else
+ return false;
+#endif
+}
+
+static bool
+symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled)
+{
+ if (sym_addr == target_addr)
+ return true;
+
+ /*
+ * On x86_64 architectures with CET (Control-flow Enforcement Technology),
+ * function entry points have a 4-byte 'endbr' instruction prefix.
+ * This causes kprobe hooks to target the address *after* 'endbr'
+ * (symbol address + 4), preserving the CET instruction.
+ * Here we check if the symbol address matches the hook target address
+ * minus 4, indicating a CET-enabled function entry point.
+ */
+ if (is_ibt_enabled && sym_addr == target_addr - 4)
+ return true;
+
+ return false;
+}
+
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -306,11 +347,13 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
jsonw_start_object(json_wtr);
- jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
+ jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr);
jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
/* Print null if it is vmlinux */
if (dd.sym_mapping[i].module[0] == '\0') {
@@ -719,6 +762,7 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
if (!info->kprobe_multi.count)
return;
@@ -742,12 +786,14 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
printf("\n\t%016lx %-16llx %s",
- dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
+ (unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 2b7f2bd3a7db..a829a6a49037 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -33,6 +33,9 @@ bool relaxed_maps;
bool use_loader;
struct btf *base_btf;
struct hashmap *refs_table;
+bool sign_progs;
+const char *private_key_path;
+const char *cert_path;
static void __noreturn clean_and_exit(int i)
{
@@ -61,7 +64,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-V|--version} }\n"
"",
@@ -87,6 +90,7 @@ static const struct cmd commands[] = {
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
{ "iter", do_iter },
+ { "token", do_token },
{ "version", do_version },
{ 0 }
};
@@ -447,6 +451,7 @@ int main(int argc, char **argv)
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
+ { "sign", no_argument, NULL, 'S' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -473,7 +478,7 @@ int main(int argc, char **argv)
bin_name = "bpftool";
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -519,6 +524,16 @@ int main(int argc, char **argv)
case 'L':
use_loader = true;
break;
+ case 'S':
+ sign_progs = true;
+ use_loader = true;
+ break;
+ case 'k':
+ private_key_path = optarg;
+ break;
+ case 'i':
+ cert_path = optarg;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -533,6 +548,16 @@ int main(int argc, char **argv)
if (argc < 0)
usage();
+ if (sign_progs && (private_key_path == NULL || cert_path == NULL)) {
+ p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing");
+ return -EINVAL;
+ }
+
+ if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) {
+ p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs");
+ return -EINVAL;
+ }
+
if (version_requested)
ret = do_version(argc, argv);
else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6db704fda5c0..1130299cede0 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -6,9 +6,14 @@
/* BFD and kernel.h both define GCC_VERSION, differently */
#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <stdbool.h>
#include <stdio.h>
+#include <errno.h>
#include <stdlib.h>
+#include <bpf/skel_internal.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -52,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
})
#define ERR_MAX_LEN 1024
+#define MAX_SIG_SIZE 4096
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
@@ -85,6 +91,9 @@ extern bool relaxed_maps;
extern bool use_loader;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
+extern bool sign_progs;
+extern const char *private_key_path;
+extern const char *cert_path;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -166,6 +175,7 @@ int do_tracelog(int argc, char **arg) __weak;
int do_feature(int argc, char **argv) __weak;
int do_struct_ops(int argc, char **argv) __weak;
int do_iter(int argc, char **argv) __weak;
+int do_token(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -274,4 +284,15 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
/* print netfilter bpf_link info */
void netfilter_dump_plain(const struct bpf_link_info *info);
void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
+
+struct kernel_config_option {
+ const char *name;
+ bool macro_dump;
+};
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix);
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
+__u32 register_session_key(const char *key_der_path);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c9de44a45778..7ebf7dbcfba4 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1477,7 +1477,8 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
+ " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n"
+ " insn_array }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-n|--nomount} }\n"
"",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9722d841abc0..6daf19809ca4 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -23,6 +23,7 @@
#include <linux/err.h>
#include <linux/perf_event.h>
#include <linux/sizes.h>
+#include <linux/keyctl.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
@@ -714,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
- p_info("no instructions returned");
+ p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?");
return -1;
}
buf = u64_to_ptr(info->jited_prog_insns);
@@ -1930,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen)
{
struct bpf_load_and_run_opts opts = {};
struct bpf_loader_ctx *ctx;
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
sizeof(struct bpf_prog_desc));
int log_buf_sz = (1u << 24) - 1;
@@ -1953,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen)
opts.insns = gen->insns;
opts.insns_sz = gen->insns_sz;
fds_before = count_open_fds();
+
+ if (sign_progs) {
+ opts.excl_prog_hash = prog_sha;
+ opts.excl_prog_hash_sz = sizeof(prog_sha);
+ opts.signature = sig_buf;
+ opts.signature_sz = MAX_SIG_SIZE;
+ opts.keyring_id = KEY_SPEC_SESSION_KEYRING;
+
+ err = bpftool_prog_sign(&opts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ err = register_session_key(cert_path);
+ if (err < 0) {
+ p_err("failed to add session key");
+ goto out;
+ }
+ }
err = bpf_load_and_run(&opts);
fd_delta = count_open_fds() - fds_before;
if (err < 0 || verifier_logs) {
@@ -1961,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen)
fprintf(stderr, "loader prog leaked %d FDs\n",
fd_delta);
}
+out:
free(log_buf);
return err;
}
@@ -1988,6 +2012,9 @@ static int do_loader(int argc, char **argv)
goto err_close_obj;
}
+ if (sign_progs)
+ gen.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &gen);
if (err)
goto err_close_obj;
@@ -2262,7 +2289,7 @@ static void profile_print_readings(void)
static char *profile_target_name(int tgt_fd)
{
- struct bpf_func_info func_info;
+ struct bpf_func_info func_info = {};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const struct btf_type *t;
@@ -2562,7 +2589,7 @@ static int do_help(int argc, char **argv)
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c
new file mode 100644
index 000000000000..f9b742f4bb10
--- /dev/null
+++ b/tools/bpf/bpftool/sign.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2025 Google LLC.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <openssl/opensslv.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#include <openssl/cms.h>
+#include <linux/keyctl.h>
+#include <errno.h>
+
+#include <bpf/skel_internal.h>
+
+#include "main.h"
+
+#define OPEN_SSL_ERR_BUF_LEN 256
+
+/* Use deprecated in 3.0 ERR_get_error_line_data for openssl < 3 */
+#if !defined(OPENSSL_VERSION_MAJOR) || (OPENSSL_VERSION_MAJOR < 3)
+#define ERR_get_error_all(file, line, func, data, flags) \
+ ERR_get_error_line_data(file, line, data, flags)
+#endif
+
+static void display_openssl_errors(int l)
+{
+ char buf[OPEN_SSL_ERR_BUF_LEN];
+ const char *file;
+ const char *data;
+ unsigned long e;
+ int flags;
+ int line;
+
+ while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) {
+ ERR_error_string_n(e, buf, sizeof(buf));
+ if (data && (flags & ERR_TXT_STRING)) {
+ p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data);
+ } else {
+ p_err("OpenSSL %s: %s:%d", buf, file, line);
+ }
+ }
+}
+
+#define DISPLAY_OSSL_ERR(cond) \
+ do { \
+ bool __cond = (cond); \
+ if (__cond && ERR_peek_error()) \
+ display_openssl_errors(__LINE__);\
+ } while (0)
+
+static EVP_PKEY *read_private_key(const char *pkey_path)
+{
+ EVP_PKEY *private_key = NULL;
+ BIO *b;
+
+ b = BIO_new_file(pkey_path, "rb");
+ private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!private_key);
+ return private_key;
+}
+
+static X509 *read_x509(const char *x509_name)
+{
+ unsigned char buf[2];
+ X509 *x509 = NULL;
+ BIO *b;
+ int n;
+
+ b = BIO_new_file(x509_name, "rb");
+ if (!b)
+ goto cleanup;
+
+ /* Look at the first two bytes of the file to determine the encoding */
+ n = BIO_read(b, buf, 2);
+ if (n != 2)
+ goto cleanup;
+
+ if (BIO_reset(b) != 0)
+ goto cleanup;
+
+ if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84)
+ /* Assume raw DER encoded X.509 */
+ x509 = d2i_X509_bio(b, NULL);
+ else
+ /* Assume PEM encoded X.509 */
+ x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+
+cleanup:
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!x509);
+ return x509;
+}
+
+__u32 register_session_key(const char *key_der_path)
+{
+ unsigned char *der_buf = NULL;
+ X509 *x509 = NULL;
+ int key_id = -1;
+ int der_len;
+
+ if (!key_der_path)
+ return key_id;
+ x509 = read_x509(key_der_path);
+ if (!x509)
+ goto cleanup;
+ der_len = i2d_X509(x509, &der_buf);
+ if (der_len < 0)
+ goto cleanup;
+ key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf,
+ (size_t)der_len, KEY_SPEC_SESSION_KEYRING);
+cleanup:
+ X509_free(x509);
+ OPENSSL_free(der_buf);
+ DISPLAY_OSSL_ERR(key_id == -1);
+ return key_id;
+}
+
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
+{
+ BIO *bd_in = NULL, *bd_out = NULL;
+ EVP_PKEY *private_key = NULL;
+ CMS_ContentInfo *cms = NULL;
+ long actual_sig_len = 0;
+ X509 *x509 = NULL;
+ int err = 0;
+
+ bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz);
+ if (!bd_in) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ private_key = read_private_key(private_key_path);
+ if (!private_key) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ x509 = read_x509(cert_path);
+ if (!x509) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ cms = CMS_sign(NULL, NULL, NULL, NULL,
+ CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED |
+ CMS_STREAM);
+ if (!cms) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(),
+ CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
+ CMS_USE_KEYID | CMS_NOATTR)) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash,
+ &opts->excl_prog_hash_sz, EVP_sha256(), NULL);
+
+ bd_out = BIO_new(BIO_s_mem());
+ if (!bd_out) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ actual_sig_len = BIO_get_mem_data(bd_out, NULL);
+ if (actual_sig_len <= 0) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ if ((size_t)actual_sig_len > opts->signature_sz) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ opts->signature_sz = actual_sig_len;
+cleanup:
+ BIO_free(bd_out);
+ CMS_ContentInfo_free(cms);
+ X509_free(x509);
+ EVP_PKEY_free(private_key);
+ BIO_free(bd_in);
+ DISPLAY_OSSL_ERR(err < 0);
+ return err;
+}
diff --git a/tools/bpf/bpftool/token.c b/tools/bpf/bpftool/token.c
new file mode 100644
index 000000000000..c08f34b9d51b
--- /dev/null
+++ b/tools/bpf/bpftool/token.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2025 Didi Technology Co., Tao Chen */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define MOUNTS_FILE "/proc/mounts"
+
+static struct {
+ const char *header;
+ const char *key;
+} sets[] = {
+ {"allowed_cmds", "delegate_cmds"},
+ {"allowed_maps", "delegate_maps"},
+ {"allowed_progs", "delegate_progs"},
+ {"allowed_attachs", "delegate_attachs"},
+};
+
+static bool has_delegate_options(const char *mnt_ops)
+{
+ return strstr(mnt_ops, "delegate_cmds") ||
+ strstr(mnt_ops, "delegate_maps") ||
+ strstr(mnt_ops, "delegate_progs") ||
+ strstr(mnt_ops, "delegate_attachs");
+}
+
+static char *get_delegate_value(char *opts, const char *key)
+{
+ char *token, *rest, *ret = NULL;
+
+ if (!opts)
+ return NULL;
+
+ for (token = strtok_r(opts, ",", &rest); token;
+ token = strtok_r(NULL, ",", &rest)) {
+ if (strncmp(token, key, strlen(key)) == 0 &&
+ token[strlen(key)] == '=') {
+ ret = token + strlen(key) + 1;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_items_per_line(char *input, int items_per_line)
+{
+ char *str, *rest;
+ int cnt = 0;
+
+ if (!input)
+ return;
+
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ if (cnt % items_per_line == 0)
+ printf("\n\t ");
+
+ printf("%-20s", str);
+ cnt++;
+ }
+}
+
+#define ITEMS_PER_LINE 4
+static void show_token_info_plain(struct mntent *mntent)
+{
+ size_t i;
+
+ printf("token_info %s", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ printf("\n\t%s:", sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ print_items_per_line(value, ITEMS_PER_LINE);
+ free(opts);
+ }
+
+ printf("\n");
+}
+
+static void split_json_array_str(char *input)
+{
+ char *str, *rest;
+
+ if (!input) {
+ jsonw_start_array(json_wtr);
+ jsonw_end_array(json_wtr);
+ return;
+ }
+
+ jsonw_start_array(json_wtr);
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ jsonw_string(json_wtr, str);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void show_token_info_json(struct mntent *mntent)
+{
+ size_t i;
+
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ jsonw_name(json_wtr, sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ split_json_array_str(value);
+ free(opts);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static int __show_token_info(struct mntent *mntent)
+{
+ if (json_output)
+ show_token_info_json(mntent);
+ else
+ show_token_info_plain(mntent);
+
+ return 0;
+}
+
+static int show_token_info(void)
+{
+ FILE *fp;
+ struct mntent *ent;
+
+ fp = setmntent(MOUNTS_FILE, "r");
+ if (!fp) {
+ p_err("Failed to open: %s", MOUNTS_FILE);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while ((ent = getmntent(fp)) != NULL) {
+ if (strncmp(ent->mnt_type, "bpf", 3) == 0) {
+ if (has_delegate_options(ent->mnt_opts))
+ __show_token_info(ent);
+ }
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ endmntent(fp);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ if (argc)
+ return BAD_ARG();
+
+ return show_token_info();
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help\n"
+ " " HELP_SPEC_OPTIONS " }\n"
+ "\n"
+ "",
+ bin_name, argv[-2]);
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_token(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index 31d806e3bdaa..573a8d99f009 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt)
static bool get_tracefs_pipe(char *mnt)
{
static const char * const known_mnts[] = {
- "/sys/kernel/debug/tracing",
"/sys/kernel/tracing",
- "/tracing",
- "/trace",
+ "/sys/kernel/debug/tracing",
};
const char *pipe_name = "/trace_pipe";
const char *fstype = "tracefs";
@@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
p_info("could not find tracefs, attempting to mount it now");
- /* Most of the time, tracefs is automatically mounted by debugfs at
- * /sys/kernel/debug/tracing when we try to access it. If we could not
- * find it, it is likely that debugfs is not mounted. Let's give one
- * attempt at mounting just tracefs at /sys/kernel/tracing.
- */
- strcpy(mnt, known_mnts[1]);
+ strcpy(mnt, known_mnts[0]);
if (mount_tracefs(mnt))
return false;
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
deleted file mode 100644
index 78a436c4072e..000000000000
--- a/tools/bpf/runqslower/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-include ../../scripts/Makefile.include
-
-OUTPUT ?= $(abspath .output)/
-
-BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
-DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
-BPFTOOL ?= $(DEFAULT_BPFTOOL)
-BPF_TARGET_ENDIAN ?= --target=bpf
-LIBBPF_SRC := $(abspath ../../lib/bpf)
-BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
-BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
-BPF_DESTDIR := $(BPFOBJ_OUTPUT)
-BPF_INCLUDE := $(BPF_DESTDIR)/include
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
-CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
-CFLAGS += $(EXTRA_CFLAGS)
-LDFLAGS += $(EXTRA_LDFLAGS)
-LDLIBS += -lelf -lz
-
-# Try to detect best kernel BTF source
-KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
- $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
- ../../../vmlinux /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(KERNEL_REL)
-VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
- $(wildcard $(VMLINUX_BTF_PATHS))))
-
-ifneq ($(V),1)
-MAKEFLAGS += --no-print-directory
-submake_extras := feature_display=0
-endif
-
-.DELETE_ON_ERROR:
-
-.PHONY: all clean runqslower libbpf_hdrs
-all: runqslower
-
-runqslower: $(OUTPUT)/runqslower
-
-clean:
- $(call QUIET_CLEAN, runqslower)
- $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
- $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
- $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
- $(Q)$(RM) $(OUTPUT)runqslower
- $(Q)$(RM) -r .output
-
-libbpf_hdrs: $(BPFOBJ)
-
-$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-
-$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \
- $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs
-
-$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs
-
-$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
- $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
-
-$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
- $(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \
- -c $(filter %.c,$^) -o $@ && \
- $(LLVM_STRIP) -g $@
-
-$(OUTPUT)/%.o: %.c | $(OUTPUT)
- $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
- $(QUIET_MKDIR)mkdir -p $@
-
-$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
-ifeq ($(VMLINUX_H),)
- $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
- echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
- "specify its location." >&2; \
- exit 1;\
- fi
- $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
-else
- $(Q)cp "$(VMLINUX_H)" $@
-endif
-
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \
- DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers
-
-$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
deleted file mode 100644
index fced54a3adf6..000000000000
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2019 Facebook
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include "runqslower.h"
-
-#define TASK_RUNNING 0
-#define BPF_F_CURRENT_CPU 0xffffffffULL
-
-const volatile __u64 min_us = 0;
-const volatile pid_t targ_pid = 0;
-
-struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, u64);
-} start SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
-} events SEC(".maps");
-
-/* record enqueue timestamp */
-__always_inline
-static int trace_enqueue(struct task_struct *t)
-{
- u32 pid = t->pid;
- u64 *ptr;
-
- if (!pid || (targ_pid && targ_pid != pid))
- return 0;
-
- ptr = bpf_task_storage_get(&start, t, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!ptr)
- return 0;
-
- *ptr = bpf_ktime_get_ns();
- return 0;
-}
-
-SEC("tp_btf/sched_wakeup")
-int handle__sched_wakeup(u64 *ctx)
-{
- /* TP_PROTO(struct task_struct *p) */
- struct task_struct *p = (void *)ctx[0];
-
- return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_wakeup_new")
-int handle__sched_wakeup_new(u64 *ctx)
-{
- /* TP_PROTO(struct task_struct *p) */
- struct task_struct *p = (void *)ctx[0];
-
- return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_switch")
-int handle__sched_switch(u64 *ctx)
-{
- /* TP_PROTO(bool preempt, struct task_struct *prev,
- * struct task_struct *next)
- */
- struct task_struct *prev = (struct task_struct *)ctx[1];
- struct task_struct *next = (struct task_struct *)ctx[2];
- struct runq_event event = {};
- u64 *tsp, delta_us;
- u32 pid;
-
- /* ivcsw: treat like an enqueue event and store timestamp */
- if (prev->__state == TASK_RUNNING)
- trace_enqueue(prev);
-
- pid = next->pid;
-
- /* For pid mismatch, save a bpf_task_storage_get */
- if (!pid || (targ_pid && targ_pid != pid))
- return 0;
-
- /* fetch timestamp and calculate delta */
- tsp = bpf_task_storage_get(&start, next, 0, 0);
- if (!tsp)
- return 0; /* missed enqueue */
-
- delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
- if (min_us && delta_us <= min_us)
- return 0;
-
- event.pid = pid;
- event.delta_us = delta_us;
- bpf_get_current_comm(&event.task, sizeof(event.task));
-
- /* output */
- bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
- &event, sizeof(event));
-
- bpf_task_storage_delete(&start, next);
- return 0;
-}
-
-char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
deleted file mode 100644
index 83c5993a139a..000000000000
--- a/tools/bpf/runqslower/runqslower.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-// Copyright (c) 2019 Facebook
-#include <argp.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include "runqslower.h"
-#include "runqslower.skel.h"
-
-struct env {
- pid_t pid;
- __u64 min_us;
- bool verbose;
-} env = {
- .min_us = 10000,
-};
-
-const char *argp_program_version = "runqslower 0.1";
-const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] =
-"runqslower Trace long process scheduling delays.\n"
-" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n"
-"\n"
-"This script traces high scheduling delays between tasks being\n"
-"ready to run and them running on CPU after that.\n"
-"\n"
-"USAGE: runqslower [-p PID] [min_us]\n"
-"\n"
-"EXAMPLES:\n"
-" runqslower # trace run queue latency higher than 10000 us (default)\n"
-" runqslower 1000 # trace run queue latency higher than 1000 us\n"
-" runqslower -p 123 # trace pid 123 only\n";
-
-static const struct argp_option opts[] = {
- { "pid", 'p', "PID", 0, "Process PID to trace"},
- { "verbose", 'v', NULL, 0, "Verbose debug output" },
- {},
-};
-
-static error_t parse_arg(int key, char *arg, struct argp_state *state)
-{
- static int pos_args;
- int pid;
- long long min_us;
-
- switch (key) {
- case 'v':
- env.verbose = true;
- break;
- case 'p':
- errno = 0;
- pid = strtol(arg, NULL, 10);
- if (errno || pid <= 0) {
- fprintf(stderr, "Invalid PID: %s\n", arg);
- argp_usage(state);
- }
- env.pid = pid;
- break;
- case ARGP_KEY_ARG:
- if (pos_args++) {
- fprintf(stderr,
- "Unrecognized positional argument: %s\n", arg);
- argp_usage(state);
- }
- errno = 0;
- min_us = strtoll(arg, NULL, 10);
- if (errno || min_us <= 0) {
- fprintf(stderr, "Invalid delay (in us): %s\n", arg);
- argp_usage(state);
- }
- env.min_us = min_us;
- break;
- default:
- return ARGP_ERR_UNKNOWN;
- }
- return 0;
-}
-
-int libbpf_print_fn(enum libbpf_print_level level,
- const char *format, va_list args)
-{
- if (level == LIBBPF_DEBUG && !env.verbose)
- return 0;
- return vfprintf(stderr, format, args);
-}
-
-void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
-{
- const struct runq_event *e = data;
- struct tm *tm;
- char ts[32];
- time_t t;
-
- time(&t);
- tm = localtime(&t);
- strftime(ts, sizeof(ts), "%H:%M:%S", tm);
- printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us);
-}
-
-void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
-{
- printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
-}
-
-int main(int argc, char **argv)
-{
- static const struct argp argp = {
- .options = opts,
- .parser = parse_arg,
- .doc = argp_program_doc,
- };
- struct perf_buffer *pb = NULL;
- struct runqslower_bpf *obj;
- int err;
-
- err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
- if (err)
- return err;
-
- libbpf_set_print(libbpf_print_fn);
-
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
- obj = runqslower_bpf__open();
- if (!obj) {
- fprintf(stderr, "failed to open and/or load BPF object\n");
- return 1;
- }
-
- /* initialize global data (filtering options) */
- obj->rodata->targ_pid = env.pid;
- obj->rodata->min_us = env.min_us;
-
- err = runqslower_bpf__load(obj);
- if (err) {
- fprintf(stderr, "failed to load BPF object: %d\n", err);
- goto cleanup;
- }
-
- err = runqslower_bpf__attach(obj);
- if (err) {
- fprintf(stderr, "failed to attach BPF programs\n");
- goto cleanup;
- }
-
- printf("Tracing run queue latency higher than %llu us\n", env.min_us);
- printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
-
- pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
- handle_event, handle_lost_events, NULL, NULL);
- err = libbpf_get_error(pb);
- if (err) {
- pb = NULL;
- fprintf(stderr, "failed to open perf buffer: %d\n", err);
- goto cleanup;
- }
-
- while ((err = perf_buffer__poll(pb, 100)) >= 0)
- ;
- printf("Error polling perf buffer: %d\n", err);
-
-cleanup:
- perf_buffer__free(pb);
- runqslower_bpf__destroy(obj);
-
- return err != 0;
-}
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
deleted file mode 100644
index 4f70f07200c2..000000000000
--- a/tools/bpf/runqslower/runqslower.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __RUNQSLOWER_H
-#define __RUNQSLOWER_H
-
-#define TASK_COMM_LEN 16
-
-struct runq_event {
- char task[TASK_COMM_LEN];
- __u64 delta_us;
- pid_t pid;
-};
-
-#endif /* __RUNQSLOWER_H */
diff --git a/tools/build/Build b/tools/build/Build
new file mode 100644
index 000000000000..1c7e598e9f59
--- /dev/null
+++ b/tools/build/Build
@@ -0,0 +1,2 @@
+hostprogs := fixdep
+fixdep-y := fixdep.o
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 63ef21878761..3a5a3808ab2a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -37,5 +37,22 @@ ifneq ($(wildcard $(TMP_O)),)
$(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
endif
-$(OUTPUT)fixdep: $(srctree)/tools/build/fixdep.c
- $(QUIET_CC)$(HOSTCC) $(KBUILD_HOSTCFLAGS) $(KBUILD_HOSTLDFLAGS) -o $@ $<
+FIXDEP := $(OUTPUT)fixdep
+FIXDEP_IN := $(OUTPUT)fixdep-in.o
+
+# To track fixdep's dependencies properly, fixdep needs to run on itself.
+# Build it twice the first time.
+$(FIXDEP_IN): FORCE
+ $(Q)if [ ! -f $(FIXDEP) ]; then \
+ $(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)"; \
+ rm -f $(FIXDEP).o; \
+ fi
+ $(Q)$(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)"
+
+
+$(FIXDEP): $(FIXDEP_IN)
+ $(QUIET_LINK)$(HOSTCC) $(FIXDEP_IN) $(KBUILD_HOSTLDFLAGS) -o $@
+
+FORCE:
+
+.PHONY: FORCE
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 649c5ab8e8f2..a7f030fc5e83 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -68,7 +68,6 @@ FEATURE_TESTS_BASIC := \
libdw \
eventfd \
fortify-source \
- get_current_dir_name \
gettid \
glibc \
libbfd \
@@ -80,11 +79,9 @@ FEATURE_TESTS_BASIC := \
libelf-zstd \
libnuma \
numa_num_possible_cpus \
- libperl \
libpython \
libslang \
libtraceevent \
- libtracefs \
libcpupower \
pthread-attr-setaffinity-np \
pthread-barrier \
@@ -93,7 +90,6 @@ FEATURE_TESTS_BASIC := \
timerfd \
zlib \
lzma \
- get_cpuid \
bpf \
scandirat \
sched_getcpu \
@@ -121,11 +117,11 @@ FEATURE_TESTS_EXTRA := \
libbfd-liberty \
libbfd-liberty-z \
libopencsd \
+ libperl \
cxx \
llvm \
clang \
libbpf \
- libbpf-strings \
libpfm4 \
libdebuginfod \
clang-bpf-co-re \
@@ -144,13 +140,11 @@ FEATURE_DISPLAY ?= \
libelf \
libnuma \
numa_num_possible_cpus \
- libperl \
libpython \
libcapstone \
llvm-perf \
zlib \
lzma \
- get_cpuid \
bpf \
libaio \
libzstd
@@ -319,5 +313,7 @@ endef
ifeq ($(FEATURE_DISPLAY_DEFERRED),)
$(call feature_display_entries)
- $(info )
+ ifeq ($(feature_display),1)
+ $(info )
+ endif
endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b41a42818d8a..87a5a908d6fa 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -8,7 +8,6 @@ FILES= \
test-libdw.bin \
test-eventfd.bin \
test-fortify-source.bin \
- test-get_current_dir_name.bin \
test-glibc.bin \
test-gtk2.bin \
test-gtk2-infobar.bin \
@@ -34,7 +33,6 @@ FILES= \
test-libperl.bin \
test-libpython.bin \
test-libslang.bin \
- test-libslang-include-subdir.bin \
test-libtraceevent.bin \
test-libcpupower.bin \
test-libtracefs.bin \
@@ -58,8 +56,6 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-libbpf.bin \
- test-libbpf-strings.bin \
- test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
test-gettid.bin \
@@ -94,7 +90,7 @@ else
# paths are used instead.
ifdef CROSS_COMPILE
ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
- CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ CROSS_ARCH = $(notdir $(CROSS_COMPILE:%-=%))
PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
@@ -110,7 +106,7 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
- BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
+ BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -ldl -lz -llzma -lzstd
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@@ -118,7 +114,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
- $(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty
+ $(BUILD_ALL)
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -147,9 +143,6 @@ $(OUTPUT)test-libelf.bin:
$(OUTPUT)test-eventfd.bin:
$(BUILD)
-$(OUTPUT)test-get_current_dir_name.bin:
- $(BUILD)
-
$(OUTPUT)test-glibc.bin:
$(BUILD)
@@ -234,9 +227,6 @@ $(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
$(OUTPUT)test-libslang.bin:
$(BUILD) -lslang
-$(OUTPUT)test-libslang-include-subdir.bin:
- $(BUILD) -lslang
-
$(OUTPUT)test-libtraceevent.bin:
$(BUILD) -ltraceevent
@@ -316,10 +306,10 @@ $(OUTPUT)test-libcapstone.bin:
$(BUILD) # -lcapstone provided by $(FEATURE_CHECK_LDFLAGS-libcapstone)
$(OUTPUT)test-compile-32.bin:
- $(CC) -m32 -o $@ test-compile.c
+ $(CC) -m32 -Wall -Werror -o $@ test-compile.c
$(OUTPUT)test-compile-x32.bin:
- $(CC) -mx32 -o $@ test-compile.c
+ $(CC) -mx32 -Wall -Werror -o $@ test-compile.c
$(OUTPUT)test-zlib.bin:
$(BUILD) -lz
@@ -327,18 +317,12 @@ $(OUTPUT)test-zlib.bin:
$(OUTPUT)test-lzma.bin:
$(BUILD) -llzma
-$(OUTPUT)test-get_cpuid.bin:
- $(BUILD)
-
$(OUTPUT)test-bpf.bin:
$(BUILD)
$(OUTPUT)test-libbpf.bin:
$(BUILD) -lbpf
-$(OUTPUT)test-libbpf-strings.bin:
- $(BUILD)
-
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 4419fb4710bd..eb346160d0ba 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -7,17 +7,13 @@
*/
/*
- * Quirk: Python and Perl headers cannot be in arbitrary places, so keep
- * these 3 testcases at the top:
+ * Quirk: Python headers cannot be in arbitrary places, so keep this testcase at
+ * the top:
*/
#define main main_test_libpython
# include "test-libpython.c"
#undef main
-#define main main_test_libperl
-# include "test-libperl.c"
-#undef main
-
#define main main_test_hello
# include "test-hello.c"
#undef main
@@ -26,10 +22,6 @@
# include "test-libelf.c"
#undef main
-#define main main_test_get_current_dir_name
-# include "test-get_current_dir_name.c"
-#undef main
-
#define main main_test_gettid
# include "test-gettid.c"
#undef main
@@ -122,10 +114,6 @@
# include "test-lzma.c"
#undef main
-#define main main_test_get_cpuid
-# include "test-get_cpuid.c"
-#undef main
-
#define main main_test_bpf
# include "test-bpf.c"
#undef main
@@ -154,17 +142,11 @@
# include "test-libtraceevent.c"
#undef main
-#define main main_test_libtracefs
-# include "test-libtracefs.c"
-#undef main
-
int main(int argc, char *argv[])
{
main_test_libpython();
- main_test_libperl();
main_test_hello();
main_test_libelf();
- main_test_get_current_dir_name();
main_test_gettid();
main_test_glibc();
main_test_libdw();
@@ -182,7 +164,6 @@ int main(int argc, char *argv[])
main_test_pthread_attr_setaffinity_np();
main_test_pthread_barrier();
main_test_lzma();
- main_test_get_cpuid();
main_test_bpf();
main_test_scandirat();
main_test_sched_getcpu();
@@ -192,7 +173,6 @@ int main(int argc, char *argv[])
main_test_reallocarray();
main_test_libzstd();
main_test_libtraceevent();
- main_test_libtracefs();
return 0;
}
diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c
deleted file mode 100644
index bb4f065f28a6..000000000000
--- a/tools/build/feature/test-get_cpuid.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <cpuid.h>
-
-int main(void)
-{
- unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
-}
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
deleted file mode 100644
index c3c201691b4f..000000000000
--- a/tools/build/feature/test-get_current_dir_name.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <unistd.h>
-#include <stdlib.h>
-
-int main(void)
-{
- free(get_current_dir_name());
- return 0;
-}
-#undef _GNU_SOURCE
diff --git a/tools/build/feature/test-libbpf-strings.c b/tools/build/feature/test-libbpf-strings.c
deleted file mode 100644
index 83e6c45f5c85..000000000000
--- a/tools/build/feature/test-libbpf-strings.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <bpf/btf.h>
-
-int main(void)
-{
- struct btf_dump_type_data_opts opts;
-
- opts.emit_strings = 0;
- return opts.emit_strings;
-}
diff --git a/tools/build/feature/test-libslang-include-subdir.c b/tools/build/feature/test-libslang-include-subdir.c
deleted file mode 100644
index 3ea47ec7590e..000000000000
--- a/tools/build/feature/test-libslang-include-subdir.c
+++ /dev/null
@@ -1,7 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <slang/slang.h>
-
-int main(void)
-{
- return SLsmg_init_smg();
-}
diff --git a/tools/dma/.gitignore b/tools/dma/.gitignore
new file mode 100644
index 000000000000..94b68cf4147b
--- /dev/null
+++ b/tools/dma/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dma_map_benchmark
+include/linux/map_benchmark.h
diff --git a/tools/dma/Makefile b/tools/dma/Makefile
new file mode 100644
index 000000000000..e4abf37bf020
--- /dev/null
+++ b/tools/dma/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+# This will work when dma is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := dma_map_benchmark
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/map_benchmark.h: ../../include/uapi/linux/map_benchmark.h
+ mkdir -p $(OUTPUT)include/linux 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/map_benchmark.h $@
+
+prepare: $(OUTPUT)include/linux/map_benchmark.h
+
+FORCE:
+
+DMA_MAP_BENCHMARK = dma_map_benchmark
+$(DMA_MAP_BENCHMARK): prepare FORCE
+ $(CC) $(CFLAGS) $(DMA_MAP_BENCHMARK).c -o $(DMA_MAP_BENCHMARK)
+
+clean:
+ rm -f $(ALL_PROGRAMS)
+ rm -rf $(OUTPUT)include
+ find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+.PHONY: all install clean prepare FORCE
diff --git a/tools/testing/selftests/dma/config b/tools/dma/config
index 6102ee3c43cd..6102ee3c43cd 100644
--- a/tools/testing/selftests/dma/config
+++ b/tools/dma/config
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/dma/dma_map_benchmark.c
index b12f1f9babf8..dd0ed528e6df 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/dma/dma_map_benchmark.c
@@ -10,7 +10,6 @@
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <linux/types.h>
#include <linux/map_benchmark.h>
#define NSEC_PER_MSEC 1000000L
@@ -118,7 +117,7 @@ int main(int argc, char **argv)
}
printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
- threads, seconds, node, dir[directions], granule);
+ threads, seconds, node, directions[dir], granule);
printf("average map latency(us):%.1f standard deviation:%.1f\n",
map.avg_map_100ns/10.0, map.map_stddev/10.0);
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py
new file mode 100755
index 000000000000..958d5a745724
--- /dev/null
+++ b/tools/docs/check-variable-fonts.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+#
+# pylint: disable=C0103
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+or more details, see .../tools/lib/python/kdoc/latex_fonts.py.
+"""
+
+import argparse
+import sys
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+
+from kdoc.latex_fonts import LatexFontChecker
+
+checker = LatexFontChecker()
+
+parser=argparse.ArgumentParser(description=checker.description(),
+ formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument("--deny-vf",
+ help="XDG_CONFIG_HOME dir containing fontconfig/fonts.conf file")
+
+args=parser.parse_args()
+
+msg = LatexFontChecker(args.deny_vf).check()
+if msg:
+ print(msg)
+
+sys.exit(1)
diff --git a/tools/docs/checktransupdate.py b/tools/docs/checktransupdate.py
new file mode 100755
index 000000000000..e894652369a5
--- /dev/null
+++ b/tools/docs/checktransupdate.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This script helps track the translation status of the documentation
+in different locales, e.g., zh_CN. More specially, it uses `git log`
+commit to find the latest english commit from the translation commit
+(order by author date) and the latest english commits from HEAD. If
+differences occur, report the file and commits that need to be updated.
+
+The usage is as follows:
+- tools/docs/checktransupdate.py -l zh_CN
+This will print all the files that need to be updated or translated in the zh_CN locale.
+- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+This will only print the status of the specified file.
+
+The output is something like:
+Documentation/dev-tools/kfence.rst
+No translation in the locale of zh_CN
+
+Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
+1 commits needs resolving in total
+"""
+
+import os
+import re
+import time
+import logging
+from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
+from datetime import datetime
+
+
+def get_origin_path(file_path):
+ """Get the origin path from the translation path"""
+ paths = file_path.split("/")
+ tidx = paths.index("translations")
+ opaths = paths[:tidx]
+ opaths += paths[tidx + 2 :]
+ return "/".join(opaths)
+
+
+def get_latest_commit_from(file_path, commit):
+ """Get the latest commit from the specified commit for the specified file"""
+ command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ result = pipe.read()
+ result = result.split("\n")
+ if len(result) <= 1:
+ return None
+
+ logging.debug("Result: %s", result[0])
+
+ return {
+ "hash": result[0],
+ "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
+ "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
+ "message": result[4:],
+ }
+
+
+def get_origin_from_trans(origin_path, t_from_head):
+ """Get the latest origin commit from the translation commit"""
+ o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
+ while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
+ o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
+ if o_from_t is not None:
+ logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+ return o_from_t
+
+
+def get_origin_from_trans_smartly(origin_path, t_from_head):
+ """Get the latest origin commit from the formatted translation commit:
+ (1) update to commit HASH (TITLE)
+ (2) Update the translation through commit HASH (TITLE)
+ """
+ # catch flag for 12-bit commit hash
+ HASH = r'([0-9a-f]{12})'
+ # pattern 1: contains "update to commit HASH"
+ pat_update_to = re.compile(rf'update to commit {HASH}')
+ # pattern 2: contains "Update the translation through commit HASH"
+ pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
+
+ origin_commit_hash = None
+ for line in t_from_head["message"]:
+ # check if the line matches the first pattern
+ match = pat_update_to.search(line)
+ if match:
+ origin_commit_hash = match.group(1)
+ break
+ # check if the line matches the second pattern
+ match = pat_update_translation.search(line)
+ if match:
+ origin_commit_hash = match.group(1)
+ break
+ if origin_commit_hash is None:
+ return None
+ o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
+ if o_from_t is not None:
+ logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+ return o_from_t
+
+
+def get_commits_count_between(opath, commit1, commit2):
+ """Get the commits count between two commits for the specified file"""
+ command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ result = pipe.read().split("\n")
+ # filter out empty lines
+ result = list(filter(lambda x: x != "", result))
+ return result
+
+
+def pretty_output(commit):
+ """Pretty print the commit message"""
+ command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ return pipe.read()
+
+
+def valid_commit(commit):
+ """Check if the commit is valid or not"""
+ msg = pretty_output(commit)
+ return "Merge tag" not in msg
+
+def check_per_file(file_path):
+ """Check the translation status for the specified file"""
+ opath = get_origin_path(file_path)
+
+ if not os.path.isfile(opath):
+ logging.error("Cannot find the origin path for {file_path}")
+ return
+
+ o_from_head = get_latest_commit_from(opath, "HEAD")
+ t_from_head = get_latest_commit_from(file_path, "HEAD")
+
+ if o_from_head is None or t_from_head is None:
+ logging.error("Cannot find the latest commit for %s", file_path)
+ return
+
+ o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
+ # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
+ if o_from_t is None:
+ o_from_t = get_origin_from_trans(opath, t_from_head)
+
+ if o_from_t is None:
+ logging.error("Error: Cannot find the latest origin commit for %s", file_path)
+ return
+
+ if o_from_head["hash"] == o_from_t["hash"]:
+ logging.debug("No update needed for %s", file_path)
+ else:
+ logging.info(file_path)
+ commits = get_commits_count_between(
+ opath, o_from_t["hash"], o_from_head["hash"]
+ )
+ count = 0
+ for commit in commits:
+ if valid_commit(commit):
+ logging.info("commit %s", pretty_output(commit))
+ count += 1
+ logging.info("%d commits needs resolving in total\n", count)
+
+
+def valid_locales(locale):
+ """Check if the locale is valid or not"""
+ script_path = os.path.dirname(os.path.abspath(__file__))
+ linux_path = os.path.join(script_path, "../..")
+ if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
+ raise ArgumentTypeError("Invalid locale: {locale}")
+ return locale
+
+
+def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
+ """List all files with the specified suffix in the folder and its subfolders"""
+ files = []
+ stack = [folder]
+
+ while stack:
+ pwd = stack.pop()
+ # filter out the exclude folders
+ if os.path.basename(pwd) in exclude_folders:
+ continue
+ # list all files and folders
+ for item in os.listdir(pwd):
+ ab_item = os.path.join(pwd, item)
+ if os.path.isdir(ab_item):
+ stack.append(ab_item)
+ else:
+ if ab_item.endswith(include_suffix):
+ files.append(ab_item)
+
+ return files
+
+
+class DmesgFormatter(logging.Formatter):
+ """Custom dmesg logging formatter"""
+ def format(self, record):
+ timestamp = time.time()
+ formatted_time = f"[{timestamp:>10.6f}]"
+ log_message = f"{formatted_time} {record.getMessage()}"
+ return log_message
+
+
+def config_logging(log_level, log_file="checktransupdate.log"):
+ """configure logging based on the log level"""
+ # set up the root logger
+ logger = logging.getLogger()
+ logger.setLevel(log_level)
+
+ # Create console handler
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(log_level)
+
+ # Create file handler
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setLevel(log_level)
+
+ # Create formatter and add it to the handlers
+ formatter = DmesgFormatter()
+ console_handler.setFormatter(formatter)
+ file_handler.setFormatter(formatter)
+
+ # Add the handler to the logger
+ logger.addHandler(console_handler)
+ logger.addHandler(file_handler)
+
+
+def main():
+ """Main function of the script"""
+ script_path = os.path.dirname(os.path.abspath(__file__))
+ linux_path = os.path.join(script_path, "../..")
+
+ parser = ArgumentParser(description="Check the translation update")
+ parser.add_argument(
+ "-l",
+ "--locale",
+ default="zh_CN",
+ type=valid_locales,
+ help="Locale to check when files are not specified",
+ )
+
+ parser.add_argument(
+ "--print-missing-translations",
+ action=BooleanOptionalAction,
+ default=True,
+ help="Print files that do not have translations",
+ )
+
+ parser.add_argument(
+ '--log',
+ default='INFO',
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
+ help='Set the logging level')
+
+ parser.add_argument(
+ '--logfile',
+ default='checktransupdate.log',
+ help='Set the logging file (default: checktransupdate.log)')
+
+ parser.add_argument(
+ "files", nargs="*", help="Files to check, if not specified, check all files"
+ )
+ args = parser.parse_args()
+
+ # Configure logging based on the --log argument
+ log_level = getattr(logging, args.log.upper(), logging.INFO)
+ config_logging(log_level)
+
+ # Get files related to linux path
+ files = args.files
+ if len(files) == 0:
+ offical_files = list_files_with_excluding_folders(
+ os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
+ )
+
+ for file in offical_files:
+ # split the path into parts
+ path_parts = file.split(os.sep)
+ # find the index of the "Documentation" directory
+ kindex = path_parts.index("Documentation")
+ # insert the translations and locale after the Documentation directory
+ new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
+ + path_parts[kindex + 1 :]
+ # join the path parts back together
+ new_file = os.sep.join(new_path_parts)
+ if os.path.isfile(new_file):
+ files.append(new_file)
+ else:
+ if args.print_missing_translations:
+ logging.info(os.path.relpath(os.path.abspath(file), linux_path))
+ logging.info("No translation in the locale of %s\n", args.locale)
+
+ files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
+
+ # cd to linux root directory
+ os.chdir(linux_path)
+
+ for file in files:
+ check_per_file(file)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/documentation-file-ref-check b/tools/docs/documentation-file-ref-check
new file mode 100755
index 000000000000..0cad42f6943b
--- /dev/null
+++ b/tools/docs/documentation-file-ref-check
@@ -0,0 +1,245 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
+# Treewide grep for references to files under Documentation, and report
+# non-existing files in stderr.
+
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
+
+# NOTE: only add things here when the file was gone, but the text wants
+# to mention a past documentation file, for example, to give credits for
+# the original work.
+my %false_positives = (
+ "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help",
+ "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c",
+);
+
+my $scriptname = $0;
+$scriptname =~ s,tools/docs/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+my $warn = 0;
+
+if (! -e ".git") {
+ printf "Warning: can't check if file exists, as this is not a git tree\n";
+ exit 0;
+}
+
+GetOptions(
+ 'fix' => \$fix,
+ 'warn' => \$warn,
+ 'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+ print "$scriptname [--help] [--fix]\n";
+ exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while... " if ($fix);
+
+my %broken_ref;
+
+my $doc_fix = 0;
+
+open IN, "git grep ':doc:\`' Documentation/|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+ next if (m,sphinx/,);
+
+ my $file = $1;
+ my $d = $1;
+ my $doc_ref = $2;
+
+ my $f = $doc_ref;
+
+ $d =~ s,(.*/).*,$1,;
+ $f =~ s,.*\<([^\>]+)\>,$1,;
+
+ if ($f =~ m,^/,) {
+ $f = "$f.rst";
+ $f =~ s,^/,Documentation/,;
+ } else {
+ $f = "$d$f.rst";
+ }
+
+ next if (grep -e, glob("$f"));
+
+ if ($fix && !$doc_fix) {
+ print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
+ }
+ $doc_fix++;
+
+ print STDERR "$file: :doc:`$doc_ref`\n";
+}
+close IN;
+
+open IN, "git grep 'Documentation/'|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m/^([^:]+):(.*)/);
+
+ my $f = $1;
+ my $ln = $2;
+
+ # On linux-next, discard the Next/ directory
+ next if ($f =~ m,^Next/,);
+
+ # Makefiles and scripts contain nasty expressions to parse docs
+ next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/);
+
+ # It doesn't make sense to parse hidden files
+ next if ($f =~ m#/\.#);
+
+ # Skip this script
+ next if ($f eq $scriptname);
+
+ # Ignore the dir where documentation will be built
+ next if ($ln =~ m,\b(\S*)Documentation/output,);
+
+ if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
+ my $prefix = $1;
+ my $ref = $2;
+ my $base = $2;
+ my $extra = $3;
+
+ # some file references are like:
+ # /usr/src/linux/Documentation/DMA-{API,mapping}.txt
+ # For now, ignore them
+ next if ($extra =~ m/^{/);
+
+ # Remove footnotes at the end like:
+ # Documentation/devicetree/dt-object-internal.txt[1]
+ $ref =~ s/(txt|rst)\[\d+]$/$1/;
+
+ # Remove ending ']' without any '['
+ $ref =~ s/\].*// if (!($ref =~ m/\[/));
+
+ # Remove puntuation marks at the end
+ $ref =~ s/[\,\.]+$//;
+
+ my $fulref = "$prefix$ref";
+
+ $fulref =~ s/^(\<file|ref)://;
+ $fulref =~ s/^[\'\`]+//;
+ $fulref =~ s,^\$\(.*\)/,,;
+ $base =~ s,.*/,,;
+
+ # Remove URL false-positives
+ next if ($fulref =~ m/^http/);
+
+ # Remove sched-pelt false-positive
+ next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,);
+
+ # Discard some build examples from Documentation/target/tcm_mod_builder.rst
+ next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,);
+
+ # Check if exists, evaluating wildcards
+ next if (grep -e, glob("$ref $fulref"));
+
+ # Accept relative Documentation patches for tools/
+ if ($f =~ m/tools/) {
+ my $path = $f;
+ $path =~ s,(.*)/.*,$1,;
+ $path =~ s,testing/selftests/bpf,bpf/bpftool,;
+ next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref"));
+ }
+
+ # Discard known false-positives
+ if (defined($false_positives{$f})) {
+ next if ($false_positives{$f} eq $fulref);
+ }
+
+ if ($fix) {
+ if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
+ $broken_ref{$ref}++;
+ }
+ } elsif ($warn) {
+ print STDERR "Warning: $f references a file that doesn't exist: $fulref\n";
+ } else {
+ print STDERR "$f: $fulref\n";
+ }
+ }
+}
+close IN;
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+ my $new =$ref;
+
+ my $basedir = ".";
+ # On translations, only seek inside the translations directory
+ $basedir = $1 if ($ref =~ m,(Documentation/translations/[^/]+),);
+
+ # get just the basename
+ $new =~ s,.*/,,;
+
+ my $f="";
+
+ # usual reason for breakage: DT file moved around
+ if ($ref =~ /devicetree/) {
+ # usual reason for breakage: DT file renamed to .yaml
+ if (!$f) {
+ my $new_ref = $ref;
+ $new_ref =~ s/\.txt$/.yaml/;
+ $f=$new_ref if (-f $new_ref);
+ }
+
+ if (!$f) {
+ my $search = $new;
+ $search =~ s,^.*/,,;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ if (!$f) {
+ # Manufacturer name may have changed
+ $search =~ s/^.*,//;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ }
+ }
+ }
+
+ # usual reason for breakage: file renamed to .rst
+ if (!$f) {
+ $new =~ s/\.txt$/.rst/;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # usual reason for breakage: use dash or underline
+ if (!$f) {
+ $new =~ s/[-_]/[-_]/g;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # Wild guess: seek for the same name on another place
+ if (!$f) {
+ $f = qx(find $basedir -iname $new) if ($new);
+ }
+
+ my @find = split /\s+/, $f;
+
+ if (!$f) {
+ print STDERR "ERROR: Didn't find a replacement for $ref\n";
+ } elsif (scalar(@find) > 1) {
+ print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+ foreach my $j (@find) {
+ $j =~ s,^./,,;
+ print STDERR " $j\n";
+ }
+ } else {
+ $f = $find[0];
+ $f =~ s,^./,,;
+ print "INFO: Replacing $ref to $f\n";
+ foreach my $j (qx(git grep -l $ref)) {
+ qx(sed "s\@$ref\@$f\@g" -i $j);
+ }
+ }
+}
diff --git a/tools/docs/features-refresh.sh b/tools/docs/features-refresh.sh
new file mode 100755
index 000000000000..c2288124e94a
--- /dev/null
+++ b/tools/docs/features-refresh.sh
@@ -0,0 +1,98 @@
+#
+# Small script that refreshes the kernel feature support status in place.
+#
+
+for F_FILE in Documentation/features/*/*/arch-support.txt; do
+ F=$(grep "^# Kconfig:" "$F_FILE" | cut -c26-)
+
+ #
+ # Each feature F is identified by a pair (O, K), where 'O' can
+ # be either the empty string (for 'nop') or "not" (the logical
+ # negation operator '!'); other operators are not supported.
+ #
+ O=""
+ K=$F
+ if [[ "$F" == !* ]]; then
+ O="not"
+ K=$(echo $F | sed -e 's/^!//g')
+ fi
+
+ #
+ # F := (O, K) is 'valid' iff there is a Kconfig file (for some
+ # arch) which contains K.
+ #
+ # Notice that this definition entails an 'asymmetry' between
+ # the case 'O = ""' and the case 'O = "not"'. E.g., F may be
+ # _invalid_ if:
+ #
+ # [case 'O = ""']
+ # 1) no arch provides support for F,
+ # 2) K does not exist (e.g., it was renamed/mis-typed);
+ #
+ # [case 'O = "not"']
+ # 3) all archs provide support for F,
+ # 4) as in (2).
+ #
+ # The rationale for adopting this definition (and, thus, for
+ # keeping the asymmetry) is:
+ #
+ # We want to be able to 'detect' (2) (or (4)).
+ #
+ # (1) and (3) may further warn the developers about the fact
+ # that K can be removed.
+ #
+ F_VALID="false"
+ for ARCH_DIR in arch/*/; do
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ if [ ! -z "$K_GREP" ]; then
+ F_VALID="true"
+ break
+ fi
+ done
+ if [ "$F_VALID" = "false" ]; then
+ printf "WARNING: '%s' is not a valid Kconfig\n" "$F"
+ fi
+
+ T_FILE="$F_FILE.tmp"
+ grep "^#" $F_FILE > $T_FILE
+ echo " -----------------------" >> $T_FILE
+ echo " | arch |status|" >> $T_FILE
+ echo " -----------------------" >> $T_FILE
+ for ARCH_DIR in arch/*/; do
+ ARCH=$(echo $ARCH_DIR | sed -e 's/^arch//g' | sed -e 's/\///g')
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ #
+ # Arch support status values for (O, K) are updated according
+ # to the following rules.
+ #
+ # - ("", K) is 'supported by a given arch', if there is a
+ # Kconfig file for that arch which contains K;
+ #
+ # - ("not", K) is 'supported by a given arch', if there is
+ # no Kconfig file for that arch which contains K;
+ #
+ # - otherwise: preserve the previous status value (if any),
+ # default to 'not yet supported'.
+ #
+ # Notice that, according these rules, invalid features may be
+ # updated/modified.
+ #
+ if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ else
+ S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:")
+ if [ ! -z "$S" ]; then
+ echo "$S" >> $T_FILE
+ else
+ printf " |%12s: | TODO |\n" "$ARCH" \
+ >> $T_FILE
+ fi
+ fi
+ done
+ echo " -----------------------" >> $T_FILE
+ mv $T_FILE $F_FILE
+done
diff --git a/tools/docs/find-unused-docs.sh b/tools/docs/find-unused-docs.sh
new file mode 100755
index 000000000000..05552dbda5bc
--- /dev/null
+++ b/tools/docs/find-unused-docs.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# (c) 2017, Jonathan Corbet <corbet@lwn.net>
+# sayli karnik <karniksayli1995@gmail.com>
+#
+# This script detects files with kernel-doc comments for exported functions
+# that are not included in documentation.
+#
+# usage: Run 'tools/docs/find-unused-docs.sh directory' from top level of kernel
+# tree.
+#
+# example: $tools/docs/find-unused-docs.sh drivers/scsi
+#
+# Licensed under the terms of the GNU GPL License
+
+if ! [ -d "Documentation" ]; then
+ echo "Run from top level of kernel tree"
+ exit 1
+fi
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: tools/docs/find-unused-docs.sh directory"
+ exit 1
+fi
+
+if ! [ -d "$1" ]; then
+ echo "Directory $1 doesn't exist"
+ exit 1
+fi
+
+cd "$( dirname "${BASH_SOURCE[0]}" )"
+cd ..
+
+cd Documentation/
+
+echo "The following files contain kerneldoc comments for exported functions \
+that are not used in the formatted documentation"
+
+# FILES INCLUDED
+
+files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3))
+
+declare -A FILES_INCLUDED
+
+for each in "${files_included[@]}"; do
+ FILES_INCLUDED[$each]="$each"
+ done
+
+cd ..
+
+# FILES NOT INCLUDED
+
+for file in `find $1 -name '*.c'`; do
+
+ if [[ ${FILES_INCLUDED[$file]+_} ]]; then
+ continue;
+ fi
+ str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null)
+ if [[ -n "$str" ]]; then
+ echo "$file"
+ fi
+ done
+
diff --git a/tools/docs/gen-redirects.py b/tools/docs/gen-redirects.py
new file mode 100755
index 000000000000..6a6ebf6f42dc
--- /dev/null
+++ b/tools/docs/gen-redirects.py
@@ -0,0 +1,54 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2025, Oracle and/or its affiliates.
+# Author: Vegard Nossum <vegard.nossum@oracle.com>
+
+"""Generate HTML redirects for renamed Documentation/**.rst files using
+the output of tools/docs/gen-renames.py.
+
+Example:
+
+ tools/docs/gen-redirects.py --output Documentation/output/ < Documentation/.renames.txt
+"""
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument('-o', '--output', help='output directory')
+
+args = parser.parse_args()
+
+for line in sys.stdin:
+ line = line.rstrip('\n')
+
+ old_name, new_name = line.split(' ', 2)
+
+ old_html_path = os.path.join(args.output, old_name + '.html')
+ new_html_path = os.path.join(args.output, new_name + '.html')
+
+ if not os.path.exists(new_html_path):
+ print(f"warning: target does not exist: {new_html_path} (redirect from {old_html_path})")
+ continue
+
+ old_html_dir = os.path.dirname(old_html_path)
+ if not os.path.exists(old_html_dir):
+ os.makedirs(old_html_dir)
+
+ relpath = os.path.relpath(new_name, os.path.dirname(old_name)) + '.html'
+
+ with open(old_html_path, 'w') as f:
+ print(f"""\
+<!DOCTYPE html>
+
+<html lang="en">
+<head>
+ <title>This page has moved</title>
+ <meta http-equiv="refresh" content="0; url={relpath}">
+</head>
+<body>
+<p>This page has moved to <a href="{relpath}">{new_name}</a>.</p>
+</body>
+</html>""", file=f)
diff --git a/tools/docs/gen-renames.py b/tools/docs/gen-renames.py
new file mode 100755
index 000000000000..8cb3b2157d83
--- /dev/null
+++ b/tools/docs/gen-renames.py
@@ -0,0 +1,130 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2025, Oracle and/or its affiliates.
+# Author: Vegard Nossum <vegard.nossum@oracle.com>
+
+"""Trawl repository history for renames of Documentation/**.rst files.
+
+Example:
+
+ tools/docs/gen-renames.py --rev HEAD > Documentation/.renames.txt
+"""
+
+import argparse
+import itertools
+import os
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument('--rev', default='HEAD', help='generate renames up to this revision')
+
+args = parser.parse_args()
+
+def normalize(path):
+ prefix = 'Documentation/'
+ suffix = '.rst'
+
+ assert path.startswith(prefix)
+ assert path.endswith(suffix)
+
+ return path[len(prefix):-len(suffix)]
+
+class Name(object):
+ def __init__(self, name):
+ self.names = [name]
+
+ def rename(self, new_name):
+ self.names.append(new_name)
+
+names = {
+}
+
+for line in subprocess.check_output([
+ 'git', 'log',
+ '--reverse',
+ '--oneline',
+ '--find-renames',
+ '--diff-filter=RD',
+ '--name-status',
+ '--format=commit %H',
+ # ~v4.8-ish is when Sphinx/.rst was added in the first place
+ f'v4.8..{args.rev}',
+ '--',
+ 'Documentation/'
+], text=True).splitlines():
+ # rename
+ if line.startswith('R'):
+ _, old, new = line[1:].split('\t', 2)
+
+ if old.endswith('.rst') and new.endswith('.rst'):
+ old = normalize(old)
+ new = normalize(new)
+
+ name = names.get(old)
+ if name is None:
+ name = Name(old)
+ else:
+ del names[old]
+
+ name.rename(new)
+ names[new] = name
+
+ continue
+
+ # delete
+ if line.startswith('D'):
+ _, old = line.split('\t', 1)
+
+ if old.endswith('.rst'):
+ old = normalize(old)
+
+ # TODO: we could save added/modified files as well and propose
+ # them as alternatives
+ name = names.get(old)
+ if name is None:
+ pass
+ else:
+ del names[old]
+
+ continue
+
+#
+# Get the set of current files so we can sanity check that we aren't
+# redirecting any of those
+#
+
+current_files = set()
+for line in subprocess.check_output([
+ 'git', 'ls-tree',
+ '-r',
+ '--name-only',
+ args.rev,
+ 'Documentation/',
+], text=True).splitlines():
+ if line.endswith('.rst'):
+ current_files.add(normalize(line))
+
+#
+# Format/group/output result
+#
+
+result = []
+for _, v in names.items():
+ old_names = v.names[:-1]
+ new_name = v.names[-1]
+
+ for old_name in old_names:
+ if old_name == new_name:
+ # A file was renamed to its new name twice; don't redirect that
+ continue
+
+ if old_name in current_files:
+ # A file was recreated with a former name; don't redirect those
+ continue
+
+ result.append((old_name, new_name))
+
+for old_name, new_name in sorted(result):
+ print(f"{old_name} {new_name}")
diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py
new file mode 100755
index 000000000000..2f0b99401f26
--- /dev/null
+++ b/tools/docs/get_abi.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+# pylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import argparse
+import logging
+import os
+import sys
+
+# Import Python modules
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from abi.abi_parser import AbiParser # pylint: disable=C0413
+from abi.abi_regex import AbiRegex # pylint: disable=C0413
+from abi.helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413
+from abi.system_symbols import SystemSymbols # pylint: disable=C0413
+
+# Command line classes
+
+
+REST_DESC = """
+Produce output in ReST format.
+
+The output is done on two sections:
+
+- Symbols: show all parsed symbols in alphabetic order;
+- Files: cross reference the content of each file with the symbols on it.
+"""
+
+class AbiRest:
+ """Initialize an argparse subparser for rest output"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("rest",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description=REST_DESC)
+
+ parser.add_argument("--enable-lineno", action="store_true",
+ help="enable lineno")
+ parser.add_argument("--raw", action="store_true",
+ help="output text as contained in the ABI files. "
+ "It not used, output will contain dynamically"
+ " generated cross references when possible.")
+ parser.add_argument("--no-file", action="store_true",
+ help="Don't the files section")
+ parser.add_argument("--show-hints", help="Show-hints")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.check_issues()
+
+ for t in parser.doc(args.raw, not args.no_file):
+ if args.enable_lineno:
+ print (f".. LINENO {t[1]}#{t[2]}\n\n")
+
+ print(t[0])
+
+class AbiValidate:
+ """Initialize an argparse subparser for ABI validation"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("validate",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description="list events")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.check_issues()
+
+
+class AbiSearch:
+ """Initialize an argparse subparser for ABI search"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("search",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description="Search ABI using a regular expression")
+
+ parser.add_argument("expression",
+ help="Case-insensitive search pattern for the ABI symbol")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.search_symbols(args.expression)
+
+UNDEFINED_DESC="""
+Check undefined ABIs on local machine.
+
+Read sysfs devnodes and check if the devnodes there are defined inside
+ABI documentation.
+
+The search logic tries to minimize the number of regular expressions to
+search per each symbol.
+
+By default, it runs on a single CPU, as Python support for CPU threads
+is still experimental, and multi-process runs on Python is very slow.
+
+On experimental tests, if the number of ABI symbols to search per devnode
+is contained on a limit of ~150 regular expressions, using a single CPU
+is a lot faster than using multiple processes. However, if the number of
+regular expressions to check is at the order of ~30000, using multiple
+CPUs speeds up the check.
+"""
+
+class AbiUndefined:
+ """
+ Initialize an argparse subparser for logic to check undefined ABI at
+ the current machine's sysfs
+ """
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("undefined",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description=UNDEFINED_DESC)
+
+ parser.add_argument("-S", "--sysfs-dir", default="/sys",
+ help="directory where sysfs is mounted")
+ parser.add_argument("-s", "--search-string",
+ help="search string regular expression to limit symbol search")
+ parser.add_argument("-H", "--show-hints", action="store_true",
+ help="Hints about definitions for missing ABI symbols.")
+ parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1,
+ help="If bigger than one, enables multiprocessing.")
+ parser.add_argument("-c", "--max-chunk-size", type=int, default=50,
+ help="Maximum number of chunk size")
+ parser.add_argument("-f", "--found", action="store_true",
+ help="Also show found items. "
+ "Helpful to debug the parser."),
+ parser.add_argument("-d", "--dry-run", action="store_true",
+ help="Don't actually search for undefined. "
+ "Helpful to debug the parser."),
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ abi = AbiRegex(args.dir, debug=args.debug,
+ search_string=args.search_string)
+
+ abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints,
+ sysfs=args.sysfs_dir)
+
+ abi_symbols.check_undefined_symbols(dry_run=args.dry_run,
+ found=args.found,
+ max_workers=args.jobs,
+ chunk_size=args.max_chunk_size)
+
+
+def main():
+ """Main program"""
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+ parser.add_argument("-d", "--debug", type=int, default=0, help="debug level")
+ parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP)
+
+ subparsers = parser.add_subparsers()
+
+ AbiRest(subparsers)
+ AbiValidate(subparsers)
+ AbiSearch(subparsers)
+ AbiUndefined(subparsers)
+
+ args = parser.parse_args()
+
+ if args.debug:
+ level = logging.DEBUG
+ else:
+ level = logging.INFO
+
+ logging.basicConfig(level=level, format="[%(levelname)s] %(message)s")
+
+ if "func" in args:
+ args.func(args)
+ else:
+ sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/get_feat.py b/tools/docs/get_feat.py
new file mode 100755
index 000000000000..2b5155a1f134
--- /dev/null
+++ b/tools/docs/get_feat.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Parse the Linux Feature files and produce a ReST book.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+from pprint import pprint
+
+LIB_DIR = "../../tools/lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from feat.parse_features import ParseFeature # pylint: disable=C0413
+
+SRCTREE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../..")
+DEFAULT_DIR = "Documentation/features"
+
+
+class GetFeature:
+ """Helper class to parse feature parsing parameters"""
+
+ @staticmethod
+ def get_current_arch():
+ """Detects the current architecture"""
+
+ proc = subprocess.run(["uname", "-m"], check=True,
+ capture_output=True, text=True)
+
+ arch = proc.stdout.strip()
+ if arch in ["x86_64", "i386"]:
+ arch = "x86"
+ elif arch == "s390x":
+ arch = "s390"
+
+ return arch
+
+ def run_parser(self, args):
+ """Execute the feature parser"""
+
+ feat = ParseFeature(args.directory, args.debug, args.enable_fname)
+ data = feat.parse()
+
+ if args.debug > 2:
+ pprint(data)
+
+ return feat
+
+ def run_rest(self, args):
+ """
+ Generate tables in ReST format. Three types of tables are
+ supported, depending on the calling arguments:
+
+ - neither feature nor arch is passed: generates a full matrix;
+ - arch provided: generates a table of supported tables for the
+ guiven architecture, eventually filtered by feature;
+ - only feature provided: generates a table with feature details,
+ showing what architectures it is implemented.
+ """
+
+ feat = self.run_parser(args)
+
+ if args.arch:
+ rst = feat.output_arch_table(args.arch, args.feat)
+ elif args.feat:
+ rst = feat.output_feature(args.feat)
+ else:
+ rst = feat.output_matrix()
+
+ print(rst)
+
+ def run_current(self, args):
+ """
+ Instead of using a --arch parameter, get feature for the current
+ architecture.
+ """
+
+ args.arch = self.get_current_arch()
+
+ self.run_rest(args)
+
+ def run_list(self, args):
+ """
+ Generate a list of features for a given architecture, in a format
+ parseable by other scripts. The output format is not ReST.
+ """
+
+ if not args.arch:
+ args.arch = self.get_current_arch()
+
+ feat = self.run_parser(args)
+ msg = feat.list_arch_features(args.arch, args.feat)
+
+ print(msg)
+
+ def parse_arch(self, parser):
+ """Add a --arch parsing argument"""
+
+ parser.add_argument("--arch",
+ help="Output features for an specific"
+ " architecture, optionally filtering for a "
+ "single specific feature.")
+
+ def parse_feat(self, parser):
+ """Add a --feat parsing argument"""
+
+ parser.add_argument("--feat", "--feature",
+ help="Output features for a single specific "
+ "feature.")
+
+
+ def current_args(self, subparsers):
+ """Implementscurrent argparse subparser"""
+
+ parser = subparsers.add_parser("current",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Output table in ReST "
+ "compatible ASCII format "
+ "with features for this "
+ "machine's architecture")
+
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_current)
+
+ def rest_args(self, subparsers):
+ """Implement rest argparse subparser"""
+
+ parser = subparsers.add_parser("rest",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Output table(s) in ReST "
+ "compatible ASCII format "
+ "with features in ReST "
+ "markup language. The "
+ "output is affected by "
+ "--arch or --feat/--feature"
+ " flags.")
+
+ self.parse_arch(parser)
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_rest)
+
+ def list_args(self, subparsers):
+ """Implement list argparse subparser"""
+
+ parser = subparsers.add_parser("list",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="List features for this "
+ "machine's architecture, "
+ "using an easier to parse "
+ "format. The output is "
+ "affected by --arch flag.")
+
+ self.parse_arch(parser)
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_list)
+
+ def validate_args(self, subparsers):
+ """Implement validate argparse subparser"""
+
+ parser = subparsers.add_parser("validate",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Validate the contents of "
+ "the files under "
+ f"{DEFAULT_DIR}.")
+
+ parser.set_defaults(func=self.run_parser)
+
+ def parser(self):
+ """
+ Create an arparse with common options and several subparsers
+ """
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+ parser.add_argument("-d", "--debug", action="count", default=0,
+ help="Put the script in verbose mode, useful for "
+ "debugging. Can be called multiple times, to "
+ "increase verbosity.")
+
+ parser.add_argument("--directory", "--dir", default=DEFAULT_DIR,
+ help="Changes the location of the Feature files. "
+ f"By default, it uses the {DEFAULT_DIR} "
+ "directory.")
+
+ parser.add_argument("--enable-fname", action="store_true",
+ help="Prints the file name of the feature files. "
+ "This can be used in order to track "
+ "dependencies during documentation build.")
+
+ subparsers = parser.add_subparsers()
+
+ self.current_args(subparsers)
+ self.rest_args(subparsers)
+ self.list_args(subparsers)
+ self.validate_args(subparsers)
+
+ args = parser.parse_args()
+
+ return args
+
+
+def main():
+ """Main program"""
+
+ feat = GetFeature()
+
+ args = feat.parser()
+
+ if "func" in args:
+ args.func(args)
+ else:
+ sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/list-arch.sh b/tools/docs/list-arch.sh
new file mode 100755
index 000000000000..96fe83b7058b
--- /dev/null
+++ b/tools/docs/list-arch.sh
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Small script that visualizes the kernel feature support status
+# of an architecture.
+#
+# (If no arguments are given then it will print the host architecture's status.)
+#
+
+ARCH=${1:-$(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/')}
+
+$(dirname $0)/get_feat.pl list --arch $ARCH
diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py
new file mode 100755
index 000000000000..436acea4c6ca
--- /dev/null
+++ b/tools/docs/parse-headers.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2016, 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=C0103
+
+"""
+Convert a C header or source file ``FILE_IN``, into a ReStructured Text
+included via ..parsed-literal block with cross-references for the
+documentation files that describe the API. It accepts an optional
+``FILE_RULES`` file to describes what elements will be either ignored or
+be pointed to a non-default reference type/name.
+
+The output is written at ``FILE_OUT``.
+
+It is capable of identifying defines, functions, structs, typedefs,
+enums and enum symbols and create cross-references for all of them.
+It is also capable of distinguish #define used for specifying a Linux
+ioctl.
+
+The optional ``FILE_RULES`` contains a set of rules like:
+
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+"""
+
+import argparse, sys
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.parse_data_structs import ParseDataStructs
+from kdoc.enrich_formatter import EnrichFormatter
+
+def main():
+ """Main function"""
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=EnrichFormatter)
+
+ parser.add_argument("-d", "--debug", action="count", default=0,
+ help="Increase debug level. Can be used multiple times")
+ parser.add_argument("-t", "--toc", action="store_true",
+ help="instead of a literal block, outputs a TOC table at the RST file")
+
+ parser.add_argument("file_in", help="Input C file")
+ parser.add_argument("file_out", help="Output RST file")
+ parser.add_argument("file_rules", nargs="?",
+ help="Exceptions file (optional)")
+
+ args = parser.parse_args()
+
+ parser = ParseDataStructs(debug=args.debug)
+ parser.parse_file(args.file_in, args.file_rules)
+
+ parser.debug_print()
+ parser.write_output(args.file_in, args.file_out, args.toc)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper
new file mode 100755
index 000000000000..7a5fcef25429
--- /dev/null
+++ b/tools/docs/sphinx-build-wrapper
@@ -0,0 +1,864 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0902, R0912, R0913, R0914, R0915, R0917, C0103
+#
+# Converted from docs Makefile and parallel-wrapper.sh, both under
+# GPLv2, copyrighted since 2008 by the following authors:
+#
+# Akira Yokosawa <akiyks@gmail.com>
+# Arnd Bergmann <arnd@arndb.de>
+# Breno Leitao <leitao@debian.org>
+# Carlos Bilbao <carlos.bilbao@amd.com>
+# Dave Young <dyoung@redhat.com>
+# Donald Hunter <donald.hunter@gmail.com>
+# Geert Uytterhoeven <geert+renesas@glider.be>
+# Jani Nikula <jani.nikula@intel.com>
+# Jan Stancek <jstancek@redhat.com>
+# Jonathan Corbet <corbet@lwn.net>
+# Joshua Clayton <stillcompiling@gmail.com>
+# Kees Cook <keescook@chromium.org>
+# Linus Torvalds <torvalds@linux-foundation.org>
+# Magnus Damm <damm+renesas@opensource.se>
+# Masahiro Yamada <masahiroy@kernel.org>
+# Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+# Maxim Cournoyer <maxim.cournoyer@gmail.com>
+# Peter Foley <pefoley2@pefoley.com>
+# Randy Dunlap <rdunlap@infradead.org>
+# Rob Herring <robh@kernel.org>
+# Shuah Khan <shuahkh@osg.samsung.com>
+# Thorsten Blum <thorsten.blum@toblux.com>
+# Tomas Winkler <tomas.winkler@intel.com>
+
+
+"""
+Sphinx build wrapper that handles Kernel-specific business rules:
+
+- it gets the Kernel build environment vars;
+- it determines what's the best parallelism;
+- it handles SPHINXDIRS
+
+This tool ensures that MIN_PYTHON_VERSION is satisfied. If version is
+below that, it seeks for a new Python version. If found, it re-runs using
+the newer version.
+"""
+
+import argparse
+import locale
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+
+from concurrent import futures
+from glob import glob
+
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from kdoc.python_version import PythonVersion
+from kdoc.latex_fonts import LatexFontChecker
+from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401
+
+#
+# Some constants
+#
+VENV_DEFAULT = "sphinx_latest"
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+PAPER = ["", "a4", "letter"]
+
+TARGETS = {
+ "cleandocs": { "builder": "clean" },
+ "linkcheckdocs": { "builder": "linkcheck" },
+ "htmldocs": { "builder": "html" },
+ "epubdocs": { "builder": "epub", "out_dir": "epub" },
+ "texinfodocs": { "builder": "texinfo", "out_dir": "texinfo" },
+ "infodocs": { "builder": "texinfo", "out_dir": "texinfo" },
+ "mandocs": { "builder": "man", "out_dir": "man" },
+ "latexdocs": { "builder": "latex", "out_dir": "latex" },
+ "pdfdocs": { "builder": "latex", "out_dir": "latex" },
+ "xmldocs": { "builder": "xml", "out_dir": "xml" },
+}
+
+
+#
+# SphinxBuilder class
+#
+
+class SphinxBuilder:
+ """
+ Handles a sphinx-build target, adding needed arguments to build
+ with the Kernel.
+ """
+
+ def get_path(self, path, use_cwd=False, abs_path=False):
+ """
+ Ancillary routine to handle patches the right way, as shell does.
+
+ It first expands "~" and "~user". Then, if patch is not absolute,
+ join self.srctree. Finally, if requested, convert to abspath.
+ """
+
+ path = os.path.expanduser(path)
+ if not path.startswith("/"):
+ if use_cwd:
+ base = os.getcwd()
+ else:
+ base = self.srctree
+
+ path = os.path.join(base, path)
+
+ if abs_path:
+ return os.path.abspath(path)
+
+ return path
+
+ def check_rust(self):
+ """
+ Checks if Rust is enabled
+ """
+ self.rustdoc = False
+
+ config = os.path.join(self.srctree, ".config")
+
+ if not os.path.isfile(config):
+ return
+
+ re_rust = re.compile(r"CONFIG_RUST=(m|y)")
+
+ try:
+ with open(config, "r", encoding="utf-8") as fp:
+ for line in fp:
+ if re_rust.match(line):
+ self.rustdoc = True
+ return
+
+ except OSError as e:
+ print(f"Failed to open {config}", file=sys.stderr)
+
+ def get_sphinx_extra_opts(self, n_jobs):
+ """
+ Get the number of jobs to be used for docs build passed via command
+ line and desired sphinx verbosity.
+
+ The number of jobs can be on different places:
+
+ 1) It can be passed via "-j" argument;
+ 2) The SPHINXOPTS="-j8" env var may have "-j";
+ 3) if called via GNU make, -j specifies the desired number of jobs.
+ with GNU makefile, this number is available via POSIX jobserver;
+ 4) if none of the above is available, it should default to "-jauto",
+ and let sphinx decide the best value.
+ """
+
+ #
+ # SPHINXOPTS env var, if used, contains extra arguments to be used
+ # by sphinx-build time. Among them, it may contain sphinx verbosity
+ # and desired number of parallel jobs.
+ #
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-j', '--jobs', type=int)
+ parser.add_argument('-q', '--quiet', action='store_true')
+
+ #
+ # Other sphinx-build arguments go as-is, so place them
+ # at self.sphinxopts, using shell parser
+ #
+ sphinxopts = shlex.split(os.environ.get("SPHINXOPTS", ""))
+
+ #
+ # Build a list of sphinx args, honoring verbosity here if specified
+ #
+
+ verbose = self.verbose
+ sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts)
+ if sphinx_args.quiet is True:
+ verbose = False
+
+ #
+ # If the user explicitly sets "-j" at command line, use it.
+ # Otherwise, pick it from SPHINXOPTS args
+ #
+ if n_jobs:
+ self.n_jobs = n_jobs
+ elif sphinx_args.jobs:
+ self.n_jobs = sphinx_args.jobs
+ else:
+ self.n_jobs = None
+
+ if not verbose:
+ self.sphinxopts += ["-q"]
+
+ def __init__(self, builddir, venv=None, verbose=False, n_jobs=None,
+ interactive=None):
+ """Initialize internal variables"""
+ self.venv = venv
+ self.verbose = None
+
+ #
+ # Normal variables passed from Kernel's makefile
+ #
+ self.kernelversion = os.environ.get("KERNELVERSION", "unknown")
+ self.kernelrelease = os.environ.get("KERNELRELEASE", "unknown")
+ self.pdflatex = os.environ.get("PDFLATEX", "xelatex")
+
+ #
+ # Kernel main Makefile defines a PYTHON3 variable whose default is
+ # "python3". When set to a different value, it allows running a
+ # diferent version than the default official python3 package.
+ # Several distros package python3xx-sphinx packages with newer
+ # versions of Python and sphinx-build.
+ #
+ # Honor such variable different than default
+ #
+ self.python = os.environ.get("PYTHON3")
+ if self.python == "python3":
+ self.python = None
+
+ if not interactive:
+ self.latexopts = os.environ.get("LATEXOPTS", "-interaction=batchmode -no-shell-escape")
+ else:
+ self.latexopts = os.environ.get("LATEXOPTS", "")
+
+ if not verbose:
+ verbose = bool(os.environ.get("KBUILD_VERBOSE", "") != "")
+
+ if verbose is not None:
+ self.verbose = verbose
+
+ #
+ # Source tree directory. This needs to be at os.environ, as
+ # Sphinx extensions use it
+ #
+ self.srctree = os.environ.get("srctree")
+ if not self.srctree:
+ self.srctree = "."
+ os.environ["srctree"] = self.srctree
+
+ #
+ # Now that we can expand srctree, get other directories as well
+ #
+ self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build")
+ self.kerneldoc = self.get_path(os.environ.get("KERNELDOC",
+ "scripts/kernel-doc.py"))
+ self.builddir = self.get_path(builddir, use_cwd=True, abs_path=True)
+
+ #
+ # Get directory locations for LaTeX build toolchain
+ #
+ self.pdflatex_cmd = shutil.which(self.pdflatex)
+ self.latexmk_cmd = shutil.which("latexmk")
+
+ self.env = os.environ.copy()
+
+ self.get_sphinx_extra_opts(n_jobs)
+
+ self.check_rust()
+
+ #
+ # If venv command line argument is specified, run Sphinx from venv
+ #
+ if venv:
+ bin_dir = os.path.join(venv, "bin")
+ if not os.path.isfile(os.path.join(bin_dir, "activate")):
+ sys.exit(f"Venv {venv} not found.")
+
+ # "activate" virtual env
+ self.env["PATH"] = bin_dir + ":" + self.env["PATH"]
+ self.env["VIRTUAL_ENV"] = venv
+ if "PYTHONHOME" in self.env:
+ del self.env["PYTHONHOME"]
+ print(f"Setting venv to {venv}")
+
+ def run_sphinx(self, sphinx_build, build_args, *args, **pwargs):
+ """
+ Executes sphinx-build using current python3 command.
+
+ When calling via GNU make, POSIX jobserver is used to tell how
+ many jobs are still available from a job pool. claim all remaining
+ jobs, as we don't want sphinx-build to run in parallel with other
+ jobs.
+
+ Despite that, the user may actually force a different value than
+ the number of available jobs via command line.
+
+ The "with" logic here is used to ensure that the claimed jobs will
+ be freed once subprocess finishes
+ """
+
+ with JobserverExec() as jobserver:
+ if jobserver.claim:
+ #
+ # when GNU make is used, claim available jobs from jobserver
+ #
+ n_jobs = str(jobserver.claim)
+ else:
+ #
+ # Otherwise, let sphinx decide by default
+ #
+ n_jobs = "auto"
+
+ #
+ # If explicitly requested via command line, override default
+ #
+ if self.n_jobs:
+ n_jobs = str(self.n_jobs)
+
+ #
+ # We can't simply call python3 sphinx-build, as OpenSUSE
+ # Tumbleweed uses an ELF binary file (/usr/bin/alts) to switch
+ # between different versions of sphinx-build. So, only call it
+ # prepending "python3.xx" when PYTHON3 variable is not default.
+ #
+ if self.python:
+ cmd = [self.python]
+ else:
+ cmd = []
+
+ cmd += [sphinx_build]
+ cmd += [f"-j{n_jobs}"]
+ cmd += build_args
+ cmd += self.sphinxopts
+
+ if self.verbose:
+ print(" ".join(cmd))
+
+ return subprocess.call(cmd, *args, **pwargs)
+
+ def handle_html(self, css, output_dir):
+ """
+ Extra steps for HTML and epub output.
+
+ For such targets, we need to ensure that CSS will be properly
+ copied to the output _static directory
+ """
+
+ if css:
+ css = os.path.expanduser(css)
+ if not css.startswith("/"):
+ css = os.path.join(self.srctree, css)
+
+ static_dir = os.path.join(output_dir, "_static")
+ os.makedirs(static_dir, exist_ok=True)
+
+ try:
+ shutil.copy2(css, static_dir)
+ except (OSError, IOError) as e:
+ print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr)
+
+ if self.rustdoc:
+ print("Building rust docs")
+ if "MAKE" in self.env:
+ cmd = [self.env["MAKE"]]
+ else:
+ cmd = ["make", "LLVM=1"]
+
+ cmd += [ "rustdoc"]
+ if self.verbose:
+ print(" ".join(cmd))
+
+ try:
+ subprocess.run(cmd, check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Ignored errors when building rustdoc: {e}. Is RUST enabled?",
+ file=sys.stderr)
+
+ def build_pdf_file(self, latex_cmd, from_dir, path):
+ """Builds a single pdf file using latex_cmd"""
+ try:
+ subprocess.run(latex_cmd + [path],
+ cwd=from_dir, check=True, env=self.env)
+
+ return True
+ except subprocess.CalledProcessError:
+ return False
+
+ def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs):
+ """Build PDF files in parallel if possible"""
+ builds = {}
+ build_failed = False
+ max_len = 0
+ has_tex = False
+
+ #
+ # LaTeX PDF error code is almost useless for us:
+ # any warning makes it non-zero. For kernel doc builds it always return
+ # non-zero even when build succeeds. So, let's do the best next thing:
+ # Ignore build errors. At the end, check if all PDF files were built,
+ # printing a summary with the built ones and returning 0 if all of
+ # them were actually built.
+ #
+ with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
+ jobs = {}
+
+ for from_dir, pdf_dir, entry in tex_files:
+ name = entry.name
+
+ if not name.endswith(tex_suffix):
+ continue
+
+ name = name[:-len(tex_suffix)]
+ has_tex = True
+
+ future = executor.submit(self.build_pdf_file, latex_cmd,
+ from_dir, entry.path)
+ jobs[future] = (from_dir, pdf_dir, name)
+
+ for future in futures.as_completed(jobs):
+ from_dir, pdf_dir, name = jobs[future]
+
+ pdf_name = name + ".pdf"
+ pdf_from = os.path.join(from_dir, pdf_name)
+ pdf_to = os.path.join(pdf_dir, pdf_name)
+ out_name = os.path.relpath(pdf_to, self.builddir)
+ max_len = max(max_len, len(out_name))
+
+ try:
+ success = future.result()
+
+ if success and os.path.exists(pdf_from):
+ os.rename(pdf_from, pdf_to)
+
+ #
+ # if verbose, get the name of built PDF file
+ #
+ if self.verbose:
+ builds[out_name] = "SUCCESS"
+ else:
+ builds[out_name] = "FAILED"
+ build_failed = True
+ except futures.Error as e:
+ builds[out_name] = f"FAILED ({repr(e)})"
+ build_failed = True
+
+ #
+ # Handle case where no .tex files were found
+ #
+ if not has_tex:
+ out_name = "LaTeX files"
+ max_len = max(max_len, len(out_name))
+ builds[out_name] = "FAILED: no .tex files were generated"
+ build_failed = True
+
+ return builds, build_failed, max_len
+
+ def handle_pdf(self, output_dirs, deny_vf):
+ """
+ Extra steps for PDF output.
+
+ As PDF is handled via a LaTeX output, after building the .tex file,
+ a new build is needed to create the PDF output from the latex
+ directory.
+ """
+ builds = {}
+ max_len = 0
+ tex_suffix = ".tex"
+ tex_files = []
+
+ #
+ # Since early 2024, Fedora and openSUSE tumbleweed have started
+ # deploying variable-font format of "Noto CJK", causing LaTeX
+ # to break with CJK. Work around it, by denying the variable font
+ # usage during xelatex build by passing the location of a config
+ # file with a deny list.
+ #
+ # See tools/docs/lib/latex_fonts.py for more details.
+ #
+ if deny_vf:
+ deny_vf = os.path.expanduser(deny_vf)
+ if os.path.isdir(deny_vf):
+ self.env["XDG_CONFIG_HOME"] = deny_vf
+
+ for from_dir in output_dirs:
+ pdf_dir = os.path.join(from_dir, "../pdf")
+ os.makedirs(pdf_dir, exist_ok=True)
+
+ if self.latexmk_cmd:
+ latex_cmd = [self.latexmk_cmd, f"-{self.pdflatex}"]
+ else:
+ latex_cmd = [self.pdflatex]
+
+ latex_cmd.extend(shlex.split(self.latexopts))
+
+ # Get a list of tex files to process
+ with os.scandir(from_dir) as it:
+ for entry in it:
+ if entry.name.endswith(tex_suffix):
+ tex_files.append((from_dir, pdf_dir, entry))
+
+ #
+ # When using make, this won't be used, as the number of jobs comes
+ # from POSIX jobserver. So, this covers the case where build comes
+ # from command line. On such case, serialize by default, except if
+ # the user explicitly sets the number of jobs.
+ #
+ n_jobs = 1
+
+ # n_jobs is either an integer or "auto". Only use it if it is a number
+ if self.n_jobs:
+ try:
+ n_jobs = int(self.n_jobs)
+ except ValueError:
+ pass
+
+ #
+ # When using make, jobserver.claim is the number of jobs that were
+ # used with "-j" and that aren't used by other make targets
+ #
+ with JobserverExec() as jobserver:
+ n_jobs = 1
+
+ #
+ # Handle the case when a parameter is passed via command line,
+ # using it as default, if jobserver doesn't claim anything
+ #
+ if self.n_jobs:
+ try:
+ n_jobs = int(self.n_jobs)
+ except ValueError:
+ pass
+
+ if jobserver.claim:
+ n_jobs = jobserver.claim
+
+ builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix,
+ latex_cmd,
+ tex_files,
+ n_jobs)
+
+ #
+ # In verbose mode, print a summary with the build results per file.
+ # Otherwise, print a single line with all failures, if any.
+ # On both cases, return code 1 indicates build failures,
+ #
+ if self.verbose:
+ msg = "Summary"
+ msg += "\n" + "=" * len(msg)
+ print()
+ print(msg)
+
+ for pdf_name, pdf_file in builds.items():
+ print(f"{pdf_name:<{max_len}}: {pdf_file}")
+
+ print()
+ if build_failed:
+ msg = LatexFontChecker().check()
+ if msg:
+ print(msg)
+
+ sys.exit("Error: not all PDF files were created.")
+
+ elif build_failed:
+ n_failures = len(builds)
+ failures = ", ".join(builds.keys())
+
+ msg = LatexFontChecker().check()
+ if msg:
+ print(msg)
+
+ sys.exit(f"Error: Can't build {n_failures} PDF file(s): {failures}")
+
+ def handle_info(self, output_dirs):
+ """
+ Extra steps for Info output.
+
+ For texinfo generation, an additional make is needed from the
+ texinfo directory.
+ """
+
+ for output_dir in output_dirs:
+ try:
+ subprocess.run(["make", "info"], cwd=output_dir, check=True)
+ except subprocess.CalledProcessError as e:
+ sys.exit(f"Error generating info docs: {e}")
+
+ def handle_man(self, kerneldoc, docs_dir, src_dir, output_dir):
+ """
+ Create man pages from kernel-doc output
+ """
+
+ re_kernel_doc = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
+ re_man = re.compile(r'^\.TH "[^"]*" (\d+) "([^"]*)"')
+
+ if docs_dir == src_dir:
+ #
+ # Pick the entire set of kernel-doc markups from the entire tree
+ #
+ kdoc_files = set([self.srctree])
+ else:
+ kdoc_files = set()
+
+ for fname in glob(os.path.join(src_dir, "**"), recursive=True):
+ if os.path.isfile(fname) and fname.endswith(".rst"):
+ with open(fname, "r", encoding="utf-8") as in_fp:
+ data = in_fp.read()
+
+ for line in data.split("\n"):
+ match = re_kernel_doc.match(line)
+ if match:
+ if os.path.isfile(match.group(1)):
+ kdoc_files.add(match.group(1))
+
+ if not kdoc_files:
+ sys.exit(f"Directory {src_dir} doesn't contain kernel-doc tags")
+
+ cmd = [ kerneldoc, "-m" ] + sorted(kdoc_files)
+ try:
+ if self.verbose:
+ print(" ".join(cmd))
+
+ result = subprocess.run(cmd, stdout=subprocess.PIPE, text= True)
+
+ if result.returncode:
+ print(f"Warning: kernel-doc returned {result.returncode} warnings")
+
+ except (OSError, ValueError, subprocess.SubprocessError) as e:
+ sys.exit(f"Failed to create man pages for {src_dir}: {repr(e)}")
+
+ fp = None
+ try:
+ for line in result.stdout.split("\n"):
+ match = re_man.match(line)
+ if not match:
+ if fp:
+ fp.write(line + '\n')
+ continue
+
+ if fp:
+ fp.close()
+
+ fname = f"{output_dir}/{match.group(2)}.{match.group(1)}"
+
+ if self.verbose:
+ print(f"Creating {fname}")
+ fp = open(fname, "w", encoding="utf-8")
+ fp.write(line + '\n')
+ finally:
+ if fp:
+ fp.close()
+
+ def cleandocs(self, builder): # pylint: disable=W0613
+ """Remove documentation output directory"""
+ shutil.rmtree(self.builddir, ignore_errors=True)
+
+ def build(self, target, sphinxdirs=None,
+ theme=None, css=None, paper=None, deny_vf=None,
+ skip_sphinx=False):
+ """
+ Build documentation using Sphinx. This is the core function of this
+ module. It prepares all arguments required by sphinx-build.
+ """
+
+ builder = TARGETS[target]["builder"]
+ out_dir = TARGETS[target].get("out_dir", "")
+
+ #
+ # Cleandocs doesn't require sphinx-build
+ #
+ if target == "cleandocs":
+ self.cleandocs(builder)
+ return
+
+ if theme:
+ os.environ["DOCS_THEME"] = theme
+
+ #
+ # Other targets require sphinx-build, so check if it exists
+ #
+ if not skip_sphinx:
+ sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"])
+ if not sphinxbuild and target != "mandocs":
+ sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n")
+
+ if target == "pdfdocs":
+ if not self.pdflatex_cmd and not self.latexmk_cmd:
+ sys.exit("Error: pdflatex or latexmk required for PDF generation")
+
+ docs_dir = os.path.abspath(os.path.join(self.srctree, "Documentation"))
+
+ #
+ # Fill in base arguments for Sphinx build
+ #
+ kerneldoc = self.kerneldoc
+ if kerneldoc.startswith(self.srctree):
+ kerneldoc = os.path.relpath(kerneldoc, self.srctree)
+
+ args = [ "-b", builder, "-c", docs_dir ]
+
+ if builder == "latex":
+ if not paper:
+ paper = PAPER[1]
+
+ args.extend(["-D", f"latex_elements.papersize={paper}paper"])
+
+ if self.rustdoc:
+ args.extend(["-t", "rustdoc"])
+
+ if not sphinxdirs:
+ sphinxdirs = os.environ.get("SPHINXDIRS", ".")
+
+ #
+ # The sphinx-build tool has a bug: internally, it tries to set
+ # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+ # crash if language is not set. Detect and fix it.
+ #
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except locale.Error:
+ self.env["LC_ALL"] = "C"
+
+ #
+ # sphinxdirs can be a list or a whitespace-separated string
+ #
+ sphinxdirs_list = []
+ for sphinxdir in sphinxdirs:
+ if isinstance(sphinxdir, list):
+ sphinxdirs_list += sphinxdir
+ else:
+ sphinxdirs_list += sphinxdir.split()
+
+ #
+ # Step 1: Build each directory in separate.
+ #
+ # This is not the best way of handling it, as cross-references between
+ # them will be broken, but this is what we've been doing since
+ # the beginning.
+ #
+ output_dirs = []
+ for sphinxdir in sphinxdirs_list:
+ src_dir = os.path.join(docs_dir, sphinxdir)
+ doctree_dir = os.path.join(self.builddir, ".doctrees")
+ output_dir = os.path.join(self.builddir, sphinxdir, out_dir)
+
+ #
+ # Make directory names canonical
+ #
+ src_dir = os.path.normpath(src_dir)
+ doctree_dir = os.path.normpath(doctree_dir)
+ output_dir = os.path.normpath(output_dir)
+
+ os.makedirs(doctree_dir, exist_ok=True)
+ os.makedirs(output_dir, exist_ok=True)
+
+ output_dirs.append(output_dir)
+
+ build_args = args + [
+ "-d", doctree_dir,
+ "-D", f"kerneldoc_bin={kerneldoc}",
+ "-D", f"version={self.kernelversion}",
+ "-D", f"release={self.kernelrelease}",
+ "-D", f"kerneldoc_srctree={self.srctree}",
+ src_dir,
+ output_dir,
+ ]
+
+ if target == "mandocs":
+ self.handle_man(kerneldoc, docs_dir, src_dir, output_dir)
+ elif not skip_sphinx:
+ try:
+ result = self.run_sphinx(sphinxbuild, build_args,
+ env=self.env)
+
+ if result:
+ sys.exit(f"Build failed: return code: {result}")
+
+ except (OSError, ValueError, subprocess.SubprocessError) as e:
+ sys.exit(f"Build failed: {repr(e)}")
+
+ #
+ # Ensure that each html/epub output will have needed static files
+ #
+ if target in ["htmldocs", "epubdocs"]:
+ self.handle_html(css, output_dir)
+
+ #
+ # Step 2: Some targets (PDF and info) require an extra step once
+ # sphinx-build finishes
+ #
+ if target == "pdfdocs":
+ self.handle_pdf(output_dirs, deny_vf)
+ elif target == "infodocs":
+ self.handle_info(output_dirs)
+
+def jobs_type(value):
+ """
+ Handle valid values for -j. Accepts Sphinx "-jauto", plus a number
+ equal or bigger than one.
+ """
+ if value is None:
+ return None
+
+ if value.lower() == 'auto':
+ return value.lower()
+
+ try:
+ if int(value) >= 1:
+ return value
+
+ raise argparse.ArgumentTypeError(f"Minimum jobs is 1, got {value}")
+ except ValueError:
+ raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}") # pylint: disable=W0707
+
+def main():
+ """
+ Main function. The only mandatory argument is the target. If not
+ specified, the other arguments will use default values if not
+ specified at os.environ.
+ """
+ parser = argparse.ArgumentParser(description="Kernel documentation builder")
+
+ parser.add_argument("target", choices=list(TARGETS.keys()),
+ help="Documentation target to build")
+ parser.add_argument("--sphinxdirs", nargs="+",
+ help="Specific directories to build")
+ parser.add_argument("--builddir", default="output",
+ help="Sphinx configuration file")
+
+ parser.add_argument("--theme", help="Sphinx theme to use")
+
+ parser.add_argument("--css", help="Custom CSS file for HTML/EPUB")
+
+ parser.add_argument("--paper", choices=PAPER, default=PAPER[0],
+ help="Paper size for LaTeX/PDF output")
+
+ parser.add_argument('--deny-vf',
+ help="Configuration to deny variable fonts on pdf builds")
+
+ parser.add_argument("-v", "--verbose", action='store_true',
+ help="place build in verbose mode")
+
+ parser.add_argument('-j', '--jobs', type=jobs_type,
+ help="Sets number of jobs to use with sphinx-build")
+
+ parser.add_argument('-i', '--interactive', action='store_true',
+ help="Change latex default to run in interactive mode")
+
+ parser.add_argument('-s', '--skip-sphinx-build', action='store_true',
+ help="Skip sphinx-build step")
+
+ parser.add_argument("-V", "--venv", nargs='?', const=f'{VENV_DEFAULT}',
+ default=None,
+ help=f'If used, run Sphinx from a venv dir (default dir: {VENV_DEFAULT})')
+
+ args = parser.parse_args()
+
+ PythonVersion.check_python(MIN_PYTHON_VERSION, show_alternatives=True,
+ bail_out=True)
+
+ builder = SphinxBuilder(builddir=args.builddir, venv=args.venv,
+ verbose=args.verbose, n_jobs=args.jobs,
+ interactive=args.interactive)
+
+ builder.build(args.target, sphinxdirs=args.sphinxdirs,
+ theme=args.theme, css=args.css, paper=args.paper,
+ deny_vf=args.deny_vf,
+ skip_sphinx=args.skip_sphinx_build)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install
new file mode 100755
index 000000000000..965c9b093a41
--- /dev/null
+++ b/tools/docs/sphinx-pre-install
@@ -0,0 +1,1543 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302
+# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121
+
+# Note: this script requires at least Python 3.6 to run.
+# Don't add changes not compatible with it, it is meant to report
+# incompatible python versions.
+
+"""
+Dependency checker for Sphinx documentation Kernel build.
+
+This module provides tools to check for all required dependencies needed to
+build documentation using Sphinx, including system packages, Python modules
+and LaTeX packages for PDF generation.
+
+It detect packages for a subset of Linux distributions used by Kernel
+maintainers, showing hints and missing dependencies.
+
+The main class SphinxDependencyChecker handles the dependency checking logic
+and provides recommendations for installing missing packages. It supports both
+system package installations and Python virtual environments. By default,
+system pacage install is recommended.
+"""
+
+import argparse
+import locale
+import os
+import re
+import subprocess
+import sys
+from glob import glob
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.python_version import PythonVersion
+
+RECOMMENDED_VERSION = PythonVersion("3.4.3").version
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+
+
+class DepManager:
+ """
+ Manage package dependencies. There are three types of dependencies:
+
+ - System: dependencies required for docs build;
+ - Python: python dependencies for a native distro Sphinx install;
+ - PDF: dependencies needed by PDF builds.
+
+ Each dependency can be mandatory or optional. Not installing an optional
+ dependency won't break the build, but will cause degradation at the
+ docs output.
+ """
+
+ # Internal types of dependencies. Don't use them outside DepManager class.
+ _SYS_TYPE = 0
+ _PHY_TYPE = 1
+ _PDF_TYPE = 2
+
+ # Dependencies visible outside the class.
+ # The keys are tuple with: (type, is_mandatory flag).
+ #
+ # Currently we're not using all optional dep types. Yet, we'll keep all
+ # possible combinations here. They're not many, and that makes easier
+ # if later needed and for the name() method below
+
+ SYSTEM_MANDATORY = (_SYS_TYPE, True)
+ PYTHON_MANDATORY = (_PHY_TYPE, True)
+ PDF_MANDATORY = (_PDF_TYPE, True)
+
+ SYSTEM_OPTIONAL = (_SYS_TYPE, False)
+ PYTHON_OPTIONAL = (_PHY_TYPE, False)
+ PDF_OPTIONAL = (_PDF_TYPE, True)
+
+ def __init__(self, pdf):
+ """
+ Initialize internal vars:
+
+ - missing: missing dependencies list, containing a distro-independent
+ name for a missing dependency and its type.
+ - missing_pkg: ancillary dict containing missing dependencies in
+ distro namespace, organized by type.
+ - need: total number of needed dependencies. Never cleaned.
+ - optional: total number of optional dependencies. Never cleaned.
+ - pdf: Is PDF support enabled?
+ """
+ self.missing = {}
+ self.missing_pkg = {}
+ self.need = 0
+ self.optional = 0
+ self.pdf = pdf
+
+ @staticmethod
+ def name(dtype):
+ """
+ Ancillary routine to output a warn/error message reporting
+ missing dependencies.
+ """
+ if dtype[0] == DepManager._SYS_TYPE:
+ msg = "build"
+ elif dtype[0] == DepManager._PHY_TYPE:
+ msg = "Python"
+ else:
+ msg = "PDF"
+
+ if dtype[1]:
+ return f"ERROR: {msg} mandatory deps missing"
+ else:
+ return f"Warning: {msg} optional deps missing"
+
+ @staticmethod
+ def is_optional(dtype):
+ """Ancillary routine to report if a dependency is optional"""
+ return not dtype[1]
+
+ @staticmethod
+ def is_pdf(dtype):
+ """Ancillary routine to report if a dependency is for PDF generation"""
+ if dtype[0] == DepManager._PDF_TYPE:
+ return True
+
+ return False
+
+ def add_package(self, package, dtype):
+ """
+ Add a package at the self.missing() dictionary.
+ Doesn't update missing_pkg.
+ """
+ is_optional = DepManager.is_optional(dtype)
+ self.missing[package] = dtype
+ if is_optional:
+ self.optional += 1
+ else:
+ self.need += 1
+
+ def del_package(self, package):
+ """
+ Remove a package at the self.missing() dictionary.
+ Doesn't update missing_pkg.
+ """
+ if package in self.missing:
+ del self.missing[package]
+
+ def clear_deps(self):
+ """
+ Clear dependencies without changing needed/optional.
+
+ This is an ackward way to have a separate section to recommend
+ a package after system main dependencies.
+
+ TODO: rework the logic to prevent needing it.
+ """
+
+ self.missing = {}
+ self.missing_pkg = {}
+
+ def check_missing(self, progs):
+ """
+ Update self.missing_pkg, using progs dict to convert from the
+ agnostic package name to distro-specific one.
+
+ Returns an string with the packages to be installed, sorted and
+ with eventual duplicates removed.
+ """
+
+ self.missing_pkg = {}
+
+ for prog, dtype in sorted(self.missing.items()):
+ # At least on some LTS distros like CentOS 7, texlive doesn't
+ # provide all packages we need. When such distros are
+ # detected, we have to disable PDF output.
+ #
+ # So, we need to ignore the packages that distros would
+ # need for LaTeX to work
+ if DepManager.is_pdf(dtype) and not self.pdf:
+ self.optional -= 1
+ continue
+
+ if not dtype in self.missing_pkg:
+ self.missing_pkg[dtype] = []
+
+ self.missing_pkg[dtype].append(progs.get(prog, prog))
+
+ install = []
+ for dtype, pkgs in self.missing_pkg.items():
+ install += pkgs
+
+ return " ".join(sorted(set(install)))
+
+ def warn_install(self):
+ """
+ Emit warnings/errors related to missing packages.
+ """
+
+ output_msg = ""
+
+ for dtype in sorted(self.missing_pkg.keys()):
+ progs = " ".join(sorted(set(self.missing_pkg[dtype])))
+
+ try:
+ name = DepManager.name(dtype)
+ output_msg += f'{name}:\t{progs}\n'
+ except KeyError:
+ raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}")
+
+ if output_msg:
+ print(f"\n{output_msg}")
+
+class AncillaryMethods:
+ """
+ Ancillary methods that checks for missing dependencies for different
+ types of types, like binaries, python modules, rpm deps, etc.
+ """
+
+ @staticmethod
+ def which(prog):
+ """
+ Our own implementation of which(). We could instead use
+ shutil.which(), but this function is simple enough.
+ Probably faster to use this implementation than to import shutil.
+ """
+ for path in os.environ.get("PATH", "").split(":"):
+ full_path = os.path.join(path, prog)
+ if os.access(full_path, os.X_OK):
+ return full_path
+
+ return None
+
+ @staticmethod
+ def run(*args, **kwargs):
+ """
+ Excecute a command, hiding its output by default.
+ Preserve compatibility with older Python versions.
+ """
+
+ capture_output = kwargs.pop('capture_output', False)
+
+ if capture_output:
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = subprocess.PIPE
+ if 'stderr' not in kwargs:
+ kwargs['stderr'] = subprocess.PIPE
+ else:
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = subprocess.DEVNULL
+ if 'stderr' not in kwargs:
+ kwargs['stderr'] = subprocess.DEVNULL
+
+ # Don't break with older Python versions
+ if 'text' in kwargs and sys.version_info < (3, 7):
+ kwargs['universal_newlines'] = kwargs.pop('text')
+
+ return subprocess.run(*args, **kwargs)
+
+class MissingCheckers(AncillaryMethods):
+ """
+ Contains some ancillary checkers for different types of binaries and
+ package managers.
+ """
+
+ def __init__(self, args, texlive):
+ """
+ Initialize its internal variables
+ """
+ self.pdf = args.pdf
+ self.virtualenv = args.virtualenv
+ self.version_check = args.version_check
+ self.texlive = texlive
+
+ self.min_version = (0, 0, 0)
+ self.cur_version = (0, 0, 0)
+
+ self.deps = DepManager(self.pdf)
+
+ self.need_symlink = 0
+ self.need_sphinx = 0
+
+ self.verbose_warn_install = 1
+
+ self.virtenv_dir = ""
+ self.install = ""
+ self.python_cmd = ""
+
+ self.virtenv_prefix = ["sphinx_", "Sphinx_" ]
+
+ def check_missing_file(self, files, package, dtype):
+ """
+ Does the file exists? If not, add it to missing dependencies.
+ """
+ for f in files:
+ if os.path.exists(f):
+ return
+ self.deps.add_package(package, dtype)
+
+ def check_program(self, prog, dtype):
+ """
+ Does the program exists and it is at the PATH?
+ If not, add it to missing dependencies.
+ """
+ found = self.which(prog)
+ if found:
+ return found
+
+ self.deps.add_package(prog, dtype)
+
+ return None
+
+ def check_perl_module(self, prog, dtype):
+ """
+ Does perl have a dependency? Is it available?
+ If not, add it to missing dependencies.
+
+ Right now, we still need Perl for doc build, as it is required
+ by some tools called at docs or kernel build time, like:
+
+ tools/docs/documentation-file-ref-check
+
+ Also, checkpatch is on Perl.
+ """
+
+ # While testing with lxc download template, one of the
+ # distros (Oracle) didn't have perl - nor even an option to install
+ # before installing oraclelinux-release-el9 package.
+ #
+ # Check it before running an error. If perl is not there,
+ # add it as a mandatory package, as some parts of the doc builder
+ # needs it.
+ if not self.which("perl"):
+ self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY)
+ self.deps.add_package(prog, dtype)
+ return
+
+ try:
+ self.run(["perl", f"-M{prog}", "-e", "1"], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_python_module(self, module, is_optional=False):
+ """
+ Does a python module exists outside venv? If not, add it to missing
+ dependencies.
+ """
+ if is_optional:
+ dtype = DepManager.PYTHON_OPTIONAL
+ else:
+ dtype = DepManager.PYTHON_MANDATORY
+
+ try:
+ self.run([self.python_cmd, "-c", f"import {module}"], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(module, dtype)
+
+ def check_rpm_missing(self, pkgs, dtype):
+ """
+ Does a rpm package exists? If not, add it to missing dependencies.
+ """
+ for prog in pkgs:
+ try:
+ self.run(["rpm", "-q", prog], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_pacman_missing(self, pkgs, dtype):
+ """
+ Does a pacman package exists? If not, add it to missing dependencies.
+ """
+ for prog in pkgs:
+ try:
+ self.run(["pacman", "-Q", prog], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_missing_tex(self, is_optional=False):
+ """
+ Does a LaTeX package exists? If not, add it to missing dependencies.
+ """
+ if is_optional:
+ dtype = DepManager.PDF_OPTIONAL
+ else:
+ dtype = DepManager.PDF_MANDATORY
+
+ kpsewhich = self.which("kpsewhich")
+ for prog, package in self.texlive.items():
+
+ # If kpsewhich is not there, just add it to deps
+ if not kpsewhich:
+ self.deps.add_package(package, dtype)
+ continue
+
+ # Check if the package is needed
+ try:
+ result = self.run(
+ [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True
+ )
+
+ # Didn't find. Add it
+ if not result.stdout.strip():
+ self.deps.add_package(package, dtype)
+
+ except subprocess.CalledProcessError:
+ # kpsewhich returned an error. Add it, just in case
+ self.deps.add_package(package, dtype)
+
+ def get_sphinx_fname(self):
+ """
+ Gets the binary filename for sphinx-build.
+ """
+ if "SPHINXBUILD" in os.environ:
+ return os.environ["SPHINXBUILD"]
+
+ fname = "sphinx-build"
+ if self.which(fname):
+ return fname
+
+ fname = "sphinx-build-3"
+ if self.which(fname):
+ self.need_symlink = 1
+ return fname
+
+ return ""
+
+ def get_sphinx_version(self, cmd):
+ """
+ Gets sphinx-build version.
+ """
+ env = os.environ.copy()
+
+ # The sphinx-build tool has a bug: internally, it tries to set
+ # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+ # crash if language is not set. Detect and fix it.
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except Exception:
+ env["LC_ALL"] = "C"
+ env["LANG"] = "C"
+
+ try:
+ result = self.run([cmd, "--version"], env=env,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True, check=True)
+ except (subprocess.CalledProcessError, FileNotFoundError):
+ return None
+
+ for line in result.stdout.split("\n"):
+ match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ def check_sphinx(self, conf):
+ """
+ Checks Sphinx minimal requirements
+ """
+ try:
+ with open(conf, "r", encoding="utf-8") as f:
+ for line in f:
+ match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line)
+ if match:
+ self.min_version = PythonVersion.parse_version(match.group(1))
+ break
+ except IOError:
+ sys.exit(f"Can't open {conf}")
+
+ if not self.min_version:
+ sys.exit(f"Can't get needs_sphinx version from {conf}")
+
+ self.virtenv_dir = self.virtenv_prefix[0] + "latest"
+
+ sphinx = self.get_sphinx_fname()
+ if not sphinx:
+ self.need_sphinx = 1
+ return
+
+ self.cur_version = self.get_sphinx_version(sphinx)
+ if not self.cur_version:
+ sys.exit(f"{sphinx} didn't return its version")
+
+ if self.cur_version < self.min_version:
+ curver = PythonVersion.ver_str(self.cur_version)
+ minver = PythonVersion.ver_str(self.min_version)
+
+ print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}")
+ self.need_sphinx = 1
+ return
+
+ # On version check mode, just assume Sphinx has all mandatory deps
+ if self.version_check and self.cur_version >= RECOMMENDED_VERSION:
+ sys.exit(0)
+
+ def catcheck(self, filename):
+ """
+ Reads a file if it exists, returning as string.
+ If not found, returns an empty string.
+ """
+ if os.path.exists(filename):
+ with open(filename, "r", encoding="utf-8") as f:
+ return f.read().strip()
+ return ""
+
+ def get_system_release(self):
+ """
+ Determine the system type. There's no unique way that would work
+ with all distros with a minimal package install. So, several
+ methods are used here.
+
+ By default, it will use lsb_release function. If not available, it will
+ fail back to reading the known different places where the distro name
+ is stored.
+
+ Several modern distros now have /etc/os-release, which usually have
+ a decent coverage.
+ """
+
+ system_release = ""
+
+ if self.which("lsb_release"):
+ result = self.run(["lsb_release", "-d"], capture_output=True, text=True)
+ system_release = result.stdout.replace("Description:", "").strip()
+
+ release_files = [
+ "/etc/system-release",
+ "/etc/redhat-release",
+ "/etc/lsb-release",
+ "/etc/gentoo-release",
+ ]
+
+ if not system_release:
+ for f in release_files:
+ system_release = self.catcheck(f)
+ if system_release:
+ break
+
+ # This seems more common than LSB these days
+ if not system_release:
+ os_var = {}
+ try:
+ with open("/etc/os-release", "r", encoding="utf-8") as f:
+ for line in f:
+ match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line)
+ if match:
+ os_var[match.group(1)] = match.group(2)
+
+ system_release = os_var.get("NAME", "")
+ if "VERSION_ID" in os_var:
+ system_release += " " + os_var["VERSION_ID"]
+ elif "VERSION" in os_var:
+ system_release += " " + os_var["VERSION"]
+ except IOError:
+ pass
+
+ if not system_release:
+ system_release = self.catcheck("/etc/issue")
+
+ system_release = system_release.strip()
+
+ return system_release
+
+class SphinxDependencyChecker(MissingCheckers):
+ """
+ Main class for checking Sphinx documentation build dependencies.
+
+ - Check for missing system packages;
+ - Check for missing Python modules;
+ - Check for missing LaTeX packages needed by PDF generation;
+ - Propose Sphinx install via Python Virtual environment;
+ - Propose Sphinx install via distro-specific package install.
+ """
+ def __init__(self, args):
+ """Initialize checker variables"""
+
+ # List of required texlive packages on Fedora and OpenSuse
+ texlive = {
+ "amsfonts.sty": "texlive-amsfonts",
+ "amsmath.sty": "texlive-amsmath",
+ "amssymb.sty": "texlive-amsfonts",
+ "amsthm.sty": "texlive-amscls",
+ "anyfontsize.sty": "texlive-anyfontsize",
+ "atbegshi.sty": "texlive-oberdiek",
+ "bm.sty": "texlive-tools",
+ "capt-of.sty": "texlive-capt-of",
+ "cmap.sty": "texlive-cmap",
+ "ctexhook.sty": "texlive-ctex",
+ "ecrm1000.tfm": "texlive-ec",
+ "eqparbox.sty": "texlive-eqparbox",
+ "eu1enc.def": "texlive-euenc",
+ "fancybox.sty": "texlive-fancybox",
+ "fancyvrb.sty": "texlive-fancyvrb",
+ "float.sty": "texlive-float",
+ "fncychap.sty": "texlive-fncychap",
+ "footnote.sty": "texlive-mdwtools",
+ "framed.sty": "texlive-framed",
+ "luatex85.sty": "texlive-luatex85",
+ "multirow.sty": "texlive-multirow",
+ "needspace.sty": "texlive-needspace",
+ "palatino.sty": "texlive-psnfss",
+ "parskip.sty": "texlive-parskip",
+ "polyglossia.sty": "texlive-polyglossia",
+ "tabulary.sty": "texlive-tabulary",
+ "threeparttable.sty": "texlive-threeparttable",
+ "titlesec.sty": "texlive-titlesec",
+ "ucs.sty": "texlive-ucs",
+ "upquote.sty": "texlive-upquote",
+ "wrapfig.sty": "texlive-wrapfig",
+ }
+
+ super().__init__(args, texlive)
+
+ self.need_pip = False
+ self.rec_sphinx_upgrade = 0
+
+ self.system_release = self.get_system_release()
+ self.activate_cmd = ""
+
+ # Some distros may not have a Sphinx shipped package compatible with
+ # our minimal requirements
+ self.package_supported = True
+
+ # Recommend a new python version
+ self.recommend_python = None
+
+ # Certain hints are meant to be shown only once
+ self.distro_msg = None
+
+ self.latest_avail_ver = (0, 0, 0)
+ self.venv_ver = (0, 0, 0)
+
+ prefix = os.environ.get("srctree", ".") + "/"
+
+ self.conf = prefix + "Documentation/conf.py"
+ self.requirement_file = prefix + "Documentation/sphinx/requirements.txt"
+
+ def get_install_progs(self, progs, cmd, extra=None):
+ """
+ Check for missing dependencies using the provided program mapping.
+
+ The actual distro-specific programs are mapped via progs argument.
+ """
+ install = self.deps.check_missing(progs)
+
+ if self.verbose_warn_install:
+ self.deps.warn_install()
+
+ if not install:
+ return
+
+ if cmd:
+ if self.verbose_warn_install:
+ msg = "You should run:"
+ else:
+ msg = ""
+
+ if extra:
+ msg += "\n\t" + extra.replace("\n", "\n\t")
+
+ return(msg + "\n\tsudo " + cmd + " " + install)
+
+ return None
+
+ #
+ # Distro-specific hints methods
+ #
+
+ def give_debian_hints(self):
+ """
+ Provide package installation hints for Debian-based distros.
+ """
+ progs = {
+ "Pod::Usage": "perl-modules",
+ "convert": "imagemagick",
+ "dot": "graphviz",
+ "ensurepip": "python3-venv",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2-bin",
+ "virtualenv": "virtualenv",
+ "xelatex": "texlive-xetex",
+ "yaml": "python3-yaml",
+ }
+
+ if self.pdf:
+ pdf_pkgs = {
+ "fonts-dejavu": [
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+ ],
+ "fonts-noto-cjk": [
+ "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc",
+ ],
+ "tex-gyre": [
+ "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty"
+ ],
+ "texlive-fonts-recommended": [
+ "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm",
+ ],
+ "texlive-lang-chinese": [
+ "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty",
+ ],
+ }
+
+ for package, files in pdf_pkgs.items():
+ self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+ self.check_program("dvipng", DepManager.PDF_MANDATORY)
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \
+ "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert"
+
+ return self.get_install_progs(progs, "apt-get install")
+
+ def give_redhat_hints(self):
+ """
+ Provide package installation hints for RedHat-based distros
+ (Fedora, RHEL and RHEL-based variants).
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2-tools",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive-xetex-bin",
+ "yaml": "python3-pyyaml",
+ }
+
+ fedora_tex_pkgs = [
+ "dejavu-sans-fonts",
+ "dejavu-sans-mono-fonts",
+ "dejavu-serif-fonts",
+ "texlive-collection-fontsrecommended",
+ "texlive-collection-latex",
+ "texlive-xecjk",
+ ]
+
+ fedora = False
+ rel = None
+
+ match = re.search(r"(release|Linux)\s+(\d+)", self.system_release)
+ if match:
+ rel = int(match.group(2))
+
+ if not rel:
+ print("Couldn't identify release number")
+ noto_sans_redhat = None
+ self.pdf = False
+ elif re.search("Fedora", self.system_release):
+ # Fedora 38 and upper use this CJK font
+
+ noto_sans_redhat = "google-noto-sans-cjk-fonts"
+ fedora = True
+ else:
+ # Almalinux, CentOS, RHEL, ...
+
+ # at least up to version 9 (and Fedora < 38), that's the CJK font
+ noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts"
+
+ progs["virtualenv"] = "python-virtualenv"
+
+ if not rel or rel < 8:
+ print("ERROR: Distro not supported. Too old?")
+ return
+
+ # RHEL 8 uses Python 3.6, which is not compatible with
+ # the build system anymore. Suggest Python 3.11
+ if rel == 8:
+ self.check_program("python3.9", DepManager.SYSTEM_MANDATORY)
+ progs["python3.9"] = "python39"
+ progs["yaml"] = "python39-pyyaml"
+
+ self.recommend_python = True
+
+ # There's no python39-sphinx package. Only pip is supported
+ self.package_supported = False
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Note: RHEL-based distros typically require extra repositories.\n" \
+ "For most, enabling epel and crb are enough:\n" \
+ "\tsudo dnf install -y epel-release\n" \
+ "\tsudo dnf config-manager --set-enabled crb\n" \
+ "Yet, some may have other required repositories. Those commands could be useful:\n" \
+ "\tsudo dnf repolist all\n" \
+ "\tsudo dnf repoquery --available --info <pkgs>\n" \
+ "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want"
+
+ if self.pdf:
+ pdf_pkgs = [
+ "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc",
+ ]
+
+ self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY)
+
+ self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY)
+
+ self.check_missing_tex(DepManager.PDF_MANDATORY)
+
+ # There's no texlive-ctex on RHEL 8 repositories. This will
+ # likely affect CJK pdf build only.
+ if not fedora and rel == 8:
+ self.deps.del_package("texlive-ctex")
+
+ return self.get_install_progs(progs, "dnf install")
+
+ def give_opensuse_hints(self):
+ """
+ Provide package installation hints for openSUSE-based distros
+ (Leap and Tumbleweed).
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive-xetex-bin texlive-dejavu",
+ "yaml": "python3-pyyaml",
+ }
+
+ suse_tex_pkgs = [
+ "texlive-babel-english",
+ "texlive-caption",
+ "texlive-colortbl",
+ "texlive-courier",
+ "texlive-dvips",
+ "texlive-helvetic",
+ "texlive-makeindex",
+ "texlive-metafont",
+ "texlive-metapost",
+ "texlive-palatino",
+ "texlive-preview",
+ "texlive-times",
+ "texlive-zapfchan",
+ "texlive-zapfding",
+ ]
+
+ progs["latexmk"] = "texlive-latexmk-bin"
+
+ match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release)
+ if match:
+ rel = int(match.group(2))
+
+ # Leap 15.x uses Python 3.6, which is not compatible with
+ # the build system anymore. Suggest Python 3.11
+ if rel == 15:
+ if not self.which(self.python_cmd):
+ self.check_program("python3.11", DepManager.SYSTEM_MANDATORY)
+ progs["python3.11"] = "python311"
+ self.recommend_python = True
+
+ progs.update({
+ "python-sphinx": "python311-Sphinx python311-Sphinx-latex",
+ "virtualenv": "python311-virtualenv",
+ "yaml": "python311-PyYAML",
+ })
+ else:
+ # Tumbleweed defaults to Python 3.11
+
+ progs.update({
+ "python-sphinx": "python313-Sphinx python313-Sphinx-latex",
+ "virtualenv": "python313-virtualenv",
+ "yaml": "python313-PyYAML",
+ })
+
+ # FIXME: add support for installing CJK fonts
+ #
+ # I tried hard, but was unable to find a way to install
+ # "Noto Sans CJK SC" on openSUSE
+
+ if self.pdf:
+ self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY)
+ if self.pdf:
+ self.check_missing_tex()
+
+ return self.get_install_progs(progs, "zypper install --no-recommends")
+
+ def give_mageia_hints(self):
+ """
+ Provide package installation hints for Mageia and OpenMandriva.
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive",
+ "yaml": "python3-yaml",
+ }
+
+ tex_pkgs = [
+ "texlive-fontsextra",
+ "texlive-fonts-asian",
+ "fonts-ttf-dejavu",
+ ]
+
+ if re.search(r"OpenMandriva", self.system_release):
+ packager_cmd = "dnf install"
+ noto_sans = "noto-sans-cjk-fonts"
+ tex_pkgs = [
+ "texlive-collection-basic",
+ "texlive-collection-langcjk",
+ "texlive-collection-fontsextra",
+ "texlive-collection-fontsrecommended"
+ ]
+
+ # Tested on OpenMandriva Lx 4.3
+ progs["convert"] = "imagemagick"
+ progs["yaml"] = "python-pyyaml"
+ progs["python-virtualenv"] = "python-virtualenv"
+ progs["python-sphinx"] = "python-sphinx"
+ progs["xelatex"] = "texlive"
+
+ self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY)
+
+ # On my tests with openMandriva LX 4.0 docker image, upgraded
+ # to 4.3, python-virtualenv package is broken: it is missing
+ # ensurepip. Without it, the alternative would be to run:
+ # python3 -m venv --without-pip ~/sphinx_latest, but running
+ # pip there won't install sphinx at venv.
+ #
+ # Add a note about that.
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Notes:\n"\
+ "1. for venv, ensurepip could be broken, preventing its install method.\n" \
+ "2. at least on OpenMandriva LX 4.3, texlive packages seem broken"
+
+ else:
+ packager_cmd = "urpmi"
+ noto_sans = "google-noto-sans-cjk-ttc-fonts"
+
+ progs["latexmk"] = "texlive-collection-basic"
+
+ if self.pdf:
+ pdf_pkgs = [
+ "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/TTF/NotoSans-Regular.ttf",
+ ]
+
+ self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY)
+ self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY)
+
+ return self.get_install_progs(progs, packager_cmd)
+
+ def give_arch_linux_hints(self):
+ """
+ Provide package installation hints for ArchLinux.
+ """
+ progs = {
+ "convert": "imagemagick",
+ "dot": "graphviz",
+ "latexmk": "texlive-core",
+ "rsvg-convert": "extra/librsvg",
+ "virtualenv": "python-virtualenv",
+ "xelatex": "texlive-xetex",
+ "yaml": "python-yaml",
+ }
+
+ archlinux_tex_pkgs = [
+ "texlive-basic",
+ "texlive-binextra",
+ "texlive-core",
+ "texlive-fontsrecommended",
+ "texlive-langchinese",
+ "texlive-langcjk",
+ "texlive-latexextra",
+ "ttf-dejavu",
+ ]
+
+ if self.pdf:
+ self.check_pacman_missing(archlinux_tex_pkgs,
+ DepManager.PDF_MANDATORY)
+
+ self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"],
+ "noto-fonts-cjk",
+ DepManager.PDF_MANDATORY)
+
+
+ return self.get_install_progs(progs, "pacman -S")
+
+ def give_gentoo_hints(self):
+ """
+ Provide package installation hints for Gentoo.
+ """
+ texlive_deps = [
+ "dev-texlive/texlive-fontsrecommended",
+ "dev-texlive/texlive-latexextra",
+ "dev-texlive/texlive-xetex",
+ "media-fonts/dejavu",
+ ]
+
+ progs = {
+ "convert": "media-gfx/imagemagick",
+ "dot": "media-gfx/graphviz",
+ "rsvg-convert": "gnome-base/librsvg",
+ "virtualenv": "dev-python/virtualenv",
+ "xelatex": " ".join(texlive_deps),
+ "yaml": "dev-python/pyyaml",
+ "python-sphinx": "dev-python/sphinx",
+ }
+
+ if self.pdf:
+ pdf_pkgs = {
+ "media-fonts/dejavu": [
+ "/usr/share/fonts/dejavu/DejaVuSans.ttf",
+ ],
+ "media-fonts/noto-cjk": [
+ "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+ "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc",
+ ],
+ }
+ for package, files in pdf_pkgs.items():
+ self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+ # Handling dependencies is a nightmare, as Gentoo refuses to emerge
+ # some packages if there's no package.use file describing them.
+ # To make it worse, compilation flags shall also be present there
+ # for some packages. If USE is not perfect, error/warning messages
+ # like those are shown:
+ #
+ # !!! The following binary packages have been ignored due to non matching USE:
+ #
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+ # =media-fonts/noto-cjk-20190416 X
+ # =app-text/texlive-core-2024-r1 X cjk -xetex
+ # =app-text/texlive-core-2024-r1 X -xetex
+ # =app-text/texlive-core-2024-r1 -xetex
+ # =dev-libs/zziplib-0.13.79-r1 sdl
+ #
+ # And will ignore such packages, installing the remaining ones. That
+ # affects mostly the image extension and PDF generation.
+
+ # Package dependencies and the minimal needed args:
+ portages = {
+ "graphviz": "media-gfx/graphviz",
+ "imagemagick": "media-gfx/imagemagick",
+ "media-libs": "media-libs/harfbuzz icu",
+ "media-fonts": "media-fonts/noto-cjk",
+ "texlive": "app-text/texlive-core xetex",
+ "zziblib": "dev-libs/zziplib sdl",
+ }
+
+ extra_cmds = ""
+ if not self.distro_msg:
+ self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages"
+
+ use_base = "/etc/portage/package.use"
+ files = glob(f"{use_base}/*")
+
+ for fname, portage in portages.items():
+ install = False
+
+ while install is False:
+ if not files:
+ # No files under package.usage. Install all
+ install = True
+ break
+
+ args = portage.split(" ")
+
+ name = args.pop(0)
+
+ cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files
+ result = self.run(cmd, stdout=subprocess.PIPE, text=True)
+ if result.returncode or not result.stdout.strip():
+ # File containing portage name not found
+ install = True
+ break
+
+ # Ensure that needed USE flags are present
+ if args:
+ match_fname = result.stdout.strip()
+ with open(match_fname, 'r', encoding='utf8',
+ errors='backslashreplace') as fp:
+ for line in fp:
+ for arg in args:
+ if arg.startswith("-"):
+ continue
+
+ if not re.search(rf"\s*{arg}\b", line):
+ # Needed file argument not found
+ install = True
+ break
+
+ # Everything looks ok, don't install
+ break
+
+ # emit a code to setup missing USE
+ if install:
+ extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n")
+
+ # Now, we can use emerge and let it respect USE
+ return self.get_install_progs(progs,
+ "emerge --ask --changed-use --binpkg-respect-use=y",
+ extra_cmds)
+
+ def get_install(self):
+ """
+ OS-specific hints logic. Seeks for a hinter. If found, use it to
+ provide package-manager specific install commands.
+
+ Otherwise, outputs install instructions for the meta-packages.
+
+ Returns a string with the command to be executed to install the
+ the needed packages, if distro found. Otherwise, return just a
+ list of packages that require installation.
+ """
+ os_hints = {
+ re.compile("Red Hat Enterprise Linux"): self.give_redhat_hints,
+ re.compile("Fedora"): self.give_redhat_hints,
+ re.compile("AlmaLinux"): self.give_redhat_hints,
+ re.compile("Amazon Linux"): self.give_redhat_hints,
+ re.compile("CentOS"): self.give_redhat_hints,
+ re.compile("openEuler"): self.give_redhat_hints,
+ re.compile("Oracle Linux Server"): self.give_redhat_hints,
+ re.compile("Rocky Linux"): self.give_redhat_hints,
+ re.compile("Springdale Open Enterprise"): self.give_redhat_hints,
+
+ re.compile("Ubuntu"): self.give_debian_hints,
+ re.compile("Debian"): self.give_debian_hints,
+ re.compile("Devuan"): self.give_debian_hints,
+ re.compile("Kali"): self.give_debian_hints,
+ re.compile("Mint"): self.give_debian_hints,
+
+ re.compile("openSUSE"): self.give_opensuse_hints,
+
+ re.compile("Mageia"): self.give_mageia_hints,
+ re.compile("OpenMandriva"): self.give_mageia_hints,
+
+ re.compile("Arch Linux"): self.give_arch_linux_hints,
+ re.compile("Gentoo"): self.give_gentoo_hints,
+ }
+
+ # If the OS is detected, use per-OS hint logic
+ for regex, os_hint in os_hints.items():
+ if regex.search(self.system_release):
+ return os_hint()
+
+ #
+ # Fall-back to generic hint code for other distros
+ # That's far from ideal, specially for LaTeX dependencies.
+ #
+ progs = {"sphinx-build": "sphinx"}
+ if self.pdf:
+ self.check_missing_tex()
+
+ self.distro_msg = \
+ f"I don't know distro {self.system_release}.\n" \
+ "So, I can't provide you a hint with the install procedure.\n" \
+ "There are likely missing dependencies."
+
+ return self.get_install_progs(progs, None)
+
+ #
+ # Common dependencies
+ #
+ def deactivate_help(self):
+ """
+ Print a helper message to disable a virtual environment.
+ """
+
+ print("\n If you want to exit the virtualenv, you can use:")
+ print("\tdeactivate")
+
+ def get_virtenv(self):
+ """
+ Give a hint about how to activate an already-existing virtual
+ environment containing sphinx-build.
+
+ Returns a tuble with (activate_cmd_path, sphinx_version) with
+ the newest available virtual env.
+ """
+
+ cwd = os.getcwd()
+
+ activates = []
+
+ # Add all sphinx prefixes with possible version numbers
+ for p in self.virtenv_prefix:
+ activates += glob(f"{cwd}/{p}[0-9]*/bin/activate")
+
+ activates.sort(reverse=True, key=str.lower)
+
+ # Place sphinx_latest first, if it exists
+ for p in self.virtenv_prefix:
+ activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates
+
+ ver = (0, 0, 0)
+ for f in activates:
+ # Discard too old Sphinx virtual environments
+ match = re.search(r"(\d+)\.(\d+)\.(\d+)", f)
+ if match:
+ ver = (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+ if ver < self.min_version:
+ continue
+
+ sphinx_cmd = f.replace("activate", "sphinx-build")
+ if not os.path.isfile(sphinx_cmd):
+ continue
+
+ ver = self.get_sphinx_version(sphinx_cmd)
+
+ if not ver:
+ venv_dir = f.replace("/bin/activate", "")
+ print(f"Warning: virtual environment {venv_dir} is not working.\n" \
+ "Python version upgrade? Remove it with:\n\n" \
+ "\trm -rf {venv_dir}\n\n")
+ else:
+ if self.need_sphinx and ver >= self.min_version:
+ return (f, ver)
+ elif PythonVersion.parse_version(ver) > self.cur_version:
+ return (f, ver)
+
+ return ("", ver)
+
+ def recommend_sphinx_upgrade(self):
+ """
+ Check if Sphinx needs to be upgraded.
+
+ Returns a tuple with the higest available Sphinx version if found.
+ Otherwise, returns None to indicate either that no upgrade is needed
+ or no venv was found.
+ """
+
+ # Avoid running sphinx-builds from venv if cur_version is good
+ if self.cur_version and self.cur_version >= RECOMMENDED_VERSION:
+ self.latest_avail_ver = self.cur_version
+ return None
+
+ # Get the highest version from sphinx_*/bin/sphinx-build and the
+ # corresponding command to activate the venv/virtenv
+ self.activate_cmd, self.venv_ver = self.get_virtenv()
+
+ # Store the highest version from Sphinx existing virtualenvs
+ if self.activate_cmd and self.venv_ver > self.cur_version:
+ self.latest_avail_ver = self.venv_ver
+ else:
+ if self.cur_version:
+ self.latest_avail_ver = self.cur_version
+ else:
+ self.latest_avail_ver = (0, 0, 0)
+
+ # As we don't know package version of Sphinx, and there's no
+ # virtual environments, don't check if upgrades are needed
+ if not self.virtualenv:
+ if not self.latest_avail_ver:
+ return None
+
+ return self.latest_avail_ver
+
+ # Either there are already a virtual env or a new one should be created
+ self.need_pip = True
+
+ if not self.latest_avail_ver:
+ return None
+
+ # Return if the reason is due to an upgrade or not
+ if self.latest_avail_ver != (0, 0, 0):
+ if self.latest_avail_ver < RECOMMENDED_VERSION:
+ self.rec_sphinx_upgrade = 1
+
+ return self.latest_avail_ver
+
+ def recommend_package(self):
+ """
+ Recommend installing Sphinx as a distro-specific package.
+ """
+
+ print("\n2) As a package with:")
+
+ old_need = self.deps.need
+ old_optional = self.deps.optional
+
+ self.pdf = False
+ self.deps.optional = 0
+ old_verbose = self.verbose_warn_install
+ self.verbose_warn_install = 0
+
+ self.deps.clear_deps()
+
+ self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY)
+
+ cmd = self.get_install()
+ if cmd:
+ print(cmd)
+
+ self.deps.need = old_need
+ self.deps.optional = old_optional
+ self.verbose_warn_install = old_verbose
+
+ def recommend_sphinx_version(self, virtualenv_cmd):
+ """
+ Provide recommendations for installing or upgrading Sphinx based
+ on current version.
+
+ The logic here is complex, as it have to deal with different versions:
+
+ - minimal supported version;
+ - minimal PDF version;
+ - recommended version.
+
+ It also needs to work fine with both distro's package and
+ venv/virtualenv
+ """
+
+ if self.recommend_python:
+ cur_ver = sys.version_info[:3]
+ if cur_ver < MIN_PYTHON_VERSION:
+ print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \
+ "Please upgrade it and re-run.\n")
+ return
+
+ # Version is OK. Nothing to do.
+ if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION:
+ return
+
+ if self.latest_avail_ver:
+ latest_avail_ver = PythonVersion.ver_str(self.latest_avail_ver)
+
+ if not self.need_sphinx:
+ # sphinx-build is present and its version is >= $min_version
+
+ # only recommend enabling a newer virtenv version if makes sense.
+ if self.latest_avail_ver and self.latest_avail_ver > self.cur_version:
+ print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:")
+ if f"{self.virtenv_prefix}" in os.getcwd():
+ print("\tdeactivate")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+
+ if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION:
+ return
+
+ if not self.virtualenv:
+ # No sphinx either via package or via virtenv. As we can't
+ # Compare the versions here, just return, recommending the
+ # user to install it from the package distro.
+ if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0):
+ return
+
+ # User doesn't want a virtenv recommendation, but he already
+ # installed one via virtenv with a newer version.
+ # So, print commands to enable it
+ if self.latest_avail_ver > self.cur_version:
+ print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:")
+ if f"{self.virtenv_prefix}" in os.getcwd():
+ print("\tdeactivate")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+ print("\n")
+ else:
+ if self.need_sphinx:
+ self.deps.need += 1
+
+ # Suggest newer versions if current ones are too old
+ if self.latest_avail_ver and self.latest_avail_ver >= self.min_version:
+ if self.latest_avail_ver >= RECOMMENDED_VERSION:
+ print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+
+ # Version is above the minimal required one, but may be
+ # below the recommended one. So, print warnings/notes
+ if self.latest_avail_ver < RECOMMENDED_VERSION:
+ print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.")
+
+ # At this point, either it needs Sphinx or upgrade is recommended,
+ # both via pip
+
+ if self.rec_sphinx_upgrade:
+ if not self.virtualenv:
+ print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n")
+ else:
+ print("To upgrade Sphinx, use:\n\n")
+ else:
+ print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n")
+
+ if not virtualenv_cmd:
+ print(" Currently not possible.\n")
+ print(" Please upgrade Python to a newer version and run this script again")
+ else:
+ print(f"\t{virtualenv_cmd} {self.virtenv_dir}")
+ print(f"\t. {self.virtenv_dir}/bin/activate")
+ print(f"\tpip install -r {self.requirement_file}")
+ self.deactivate_help()
+
+ if self.package_supported:
+ self.recommend_package()
+
+ print("\n" \
+ " Please note that Sphinx currentlys produce false-positive\n" \
+ " warnings when the same name is used for more than one type (functions,\n" \
+ " structs, enums,...). This is known Sphinx bug. For more details, see:\n" \
+ "\thttps://github.com/sphinx-doc/sphinx/pull/8313")
+
+ def check_needs(self):
+ """
+ Main method that checks needed dependencies and provides
+ recommendations.
+ """
+ self.python_cmd = sys.executable
+
+ # Check if Sphinx is already accessible from current environment
+ self.check_sphinx(self.conf)
+
+ if self.system_release:
+ print(f"Detected OS: {self.system_release}.")
+ else:
+ print("Unknown OS")
+ if self.cur_version != (0, 0, 0):
+ ver = PythonVersion.ver_str(self.cur_version)
+ print(f"Sphinx version: {ver}\n")
+
+ # Check the type of virtual env, depending on Python version
+ virtualenv_cmd = None
+
+ if sys.version_info < MIN_PYTHON_VERSION:
+ min_ver = ver_str(MIN_PYTHON_VERSION)
+ print(f"ERROR: at least python {min_ver} is required to build the kernel docs")
+ self.need_sphinx = 1
+
+ self.venv_ver = self.recommend_sphinx_upgrade()
+
+ if self.need_pip:
+ if sys.version_info < MIN_PYTHON_VERSION:
+ self.need_pip = False
+ print("Warning: python version is not supported.")
+ else:
+ virtualenv_cmd = f"{self.python_cmd} -m venv"
+ self.check_python_module("ensurepip")
+
+ # Check for needed programs/tools
+ self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY)
+
+ self.check_program("make", DepManager.SYSTEM_MANDATORY)
+ self.check_program("which", DepManager.SYSTEM_MANDATORY)
+
+ self.check_program("dot", DepManager.SYSTEM_OPTIONAL)
+ self.check_program("convert", DepManager.SYSTEM_OPTIONAL)
+
+ self.check_python_module("yaml")
+
+ if self.pdf:
+ self.check_program("xelatex", DepManager.PDF_MANDATORY)
+ self.check_program("rsvg-convert", DepManager.PDF_MANDATORY)
+ self.check_program("latexmk", DepManager.PDF_MANDATORY)
+
+ # Do distro-specific checks and output distro-install commands
+ cmd = self.get_install()
+ if cmd:
+ print(cmd)
+
+ # If distro requires some special instructions, print here.
+ # Please notice that get_install() needs to be called first.
+ if self.distro_msg:
+ print("\n" + self.distro_msg)
+
+ if not self.python_cmd:
+ if self.need == 1:
+ sys.exit("Can't build as 1 mandatory dependency is missing")
+ elif self.need:
+ sys.exit(f"Can't build as {self.need} mandatory dependencies are missing")
+
+ # Check if sphinx-build is called sphinx-build-3
+ if self.need_symlink:
+ sphinx_path = self.which("sphinx-build-3")
+ if sphinx_path:
+ print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n")
+
+ self.recommend_sphinx_version(virtualenv_cmd)
+ print("")
+
+ if not self.deps.optional:
+ print("All optional dependencies are met.")
+
+ if self.deps.need == 1:
+ sys.exit("Can't build as 1 mandatory dependency is missing")
+ elif self.deps.need:
+ sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing")
+
+ print("Needed package dependencies are met.")
+
+DESCRIPTION = """
+Process some flags related to Sphinx installation and documentation build.
+"""
+
+
+def main():
+ """Main function"""
+ parser = argparse.ArgumentParser(description=DESCRIPTION)
+
+ parser.add_argument(
+ "--no-virtualenv",
+ action="store_false",
+ dest="virtualenv",
+ help="Recommend installing Sphinx instead of using a virtualenv",
+ )
+
+ parser.add_argument(
+ "--no-pdf",
+ action="store_false",
+ dest="pdf",
+ help="Don't check for dependencies required to build PDF docs",
+ )
+
+ parser.add_argument(
+ "--version-check",
+ action="store_true",
+ dest="version_check",
+ help="If version is compatible, don't check for missing dependencies",
+ )
+
+ args = parser.parse_args()
+
+ checker = SphinxDependencyChecker(args)
+
+ PythonVersion.check_python(MIN_PYTHON_VERSION,
+ bail_out=True, success_on_error=True)
+ checker.check_needs()
+
+# Call main if not used as module
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/test_doc_build.py b/tools/docs/test_doc_build.py
new file mode 100755
index 000000000000..47b4606569f9
--- /dev/null
+++ b/tools/docs/test_doc_build.py
@@ -0,0 +1,513 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301
+
+"""
+Install minimal supported requirements for different Sphinx versions
+and optionally test the build.
+"""
+
+import argparse
+import asyncio
+import os.path
+import shutil
+import sys
+import time
+import subprocess
+
+# Minimal python version supported by the building system.
+
+PYTHON = os.path.basename(sys.executable)
+
+min_python_bin = None
+
+for i in range(9, 13):
+ p = f"python3.{i}"
+ if shutil.which(p):
+ min_python_bin = p
+ break
+
+if not min_python_bin:
+ min_python_bin = PYTHON
+
+# Starting from 8.0, Python 3.9 is not supported anymore.
+PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON}
+
+DEFAULT_VERSIONS_TO_TEST = [
+ (3, 4, 3), # Minimal supported version
+ (5, 3, 0), # CentOS Stream 9 / AlmaLinux 9
+ (6, 1, 1), # Debian 12
+ (7, 2, 1), # openSUSE Leap 15.6
+ (7, 2, 6), # Ubuntu 24.04 LTS
+ (7, 4, 7), # Ubuntu 24.10
+ (7, 3, 0), # openSUSE Tumbleweed
+ (8, 1, 3), # Fedora 42
+ (8, 2, 3) # Latest version - covers rolling distros
+]
+
+# Sphinx versions to be installed and their incremental requirements
+SPHINX_REQUIREMENTS = {
+ # Oldest versions we support for each package required by Sphinx 3.4.3
+ (3, 4, 3): {
+ "docutils": "0.16",
+ "alabaster": "0.7.12",
+ "babel": "2.8.0",
+ "certifi": "2020.6.20",
+ "docutils": "0.16",
+ "idna": "2.10",
+ "imagesize": "1.2.0",
+ "Jinja2": "2.11.2",
+ "MarkupSafe": "1.1.1",
+ "packaging": "20.4",
+ "Pygments": "2.6.1",
+ "PyYAML": "5.1",
+ "requests": "2.24.0",
+ "snowballstemmer": "2.0.0",
+ "sphinxcontrib-applehelp": "1.0.2",
+ "sphinxcontrib-devhelp": "1.0.2",
+ "sphinxcontrib-htmlhelp": "1.0.3",
+ "sphinxcontrib-jsmath": "1.0.1",
+ "sphinxcontrib-qthelp": "1.0.3",
+ "sphinxcontrib-serializinghtml": "1.1.4",
+ "urllib3": "1.25.9",
+ },
+
+ # Update package dependencies to a more modern base. The goal here
+ # is to avoid to many incremental changes for the next entries
+ (3, 5, 0): {
+ "alabaster": "0.7.13",
+ "babel": "2.17.0",
+ "certifi": "2025.6.15",
+ "idna": "3.10",
+ "imagesize": "1.4.1",
+ "packaging": "25.0",
+ "Pygments": "2.8.1",
+ "requests": "2.32.4",
+ "snowballstemmer": "3.0.1",
+ "sphinxcontrib-applehelp": "1.0.4",
+ "sphinxcontrib-htmlhelp": "2.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.5",
+ "urllib3": "2.0.0",
+ },
+
+ # Starting from here, ensure all docutils versions are covered with
+ # supported Sphinx versions. Other packages are upgraded only when
+ # required by pip
+ (4, 0, 0): {
+ "PyYAML": "5.1",
+ },
+ (4, 1, 0): {
+ "docutils": "0.17",
+ "Pygments": "2.19.1",
+ "Jinja2": "3.0.3",
+ "MarkupSafe": "2.0",
+ },
+ (4, 3, 0): {},
+ (4, 4, 0): {},
+ (4, 5, 0): {
+ "docutils": "0.17.1",
+ },
+ (5, 0, 0): {},
+ (5, 1, 0): {},
+ (5, 2, 0): {
+ "docutils": "0.18",
+ "Jinja2": "3.1.2",
+ "MarkupSafe": "2.0",
+ "PyYAML": "5.3.1",
+ },
+ (5, 3, 0): {
+ "docutils": "0.18.1",
+ },
+ (6, 0, 0): {},
+ (6, 1, 0): {},
+ (6, 2, 0): {
+ "PyYAML": "5.4.1",
+ },
+ (7, 0, 0): {},
+ (7, 1, 0): {},
+ (7, 2, 0): {
+ "docutils": "0.19",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.9",
+ },
+ (7, 2, 6): {
+ "docutils": "0.20",
+ },
+ (7, 3, 0): {
+ "alabaster": "0.7.14",
+ "PyYAML": "6.0.1",
+ "tomli": "2.0.1",
+ },
+ (7, 4, 0): {
+ "docutils": "0.20.1",
+ "PyYAML": "6.0.1",
+ },
+ (8, 0, 0): {
+ "docutils": "0.21",
+ },
+ (8, 1, 0): {
+ "docutils": "0.21.1",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-applehelp": "1.0.7",
+ "sphinxcontrib-devhelp": "1.0.6",
+ "sphinxcontrib-htmlhelp": "2.0.6",
+ "sphinxcontrib-qthelp": "1.0.6",
+ },
+ (8, 2, 0): {
+ "docutils": "0.21.2",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.9",
+ },
+}
+
+
+class AsyncCommands:
+ """Excecute command synchronously"""
+
+ def __init__(self, fp=None):
+
+ self.stdout = None
+ self.stderr = None
+ self.output = None
+ self.fp = fp
+
+ def log(self, out, verbose, is_info=True):
+ out = out.removesuffix('\n')
+
+ if verbose:
+ if is_info:
+ print(out)
+ else:
+ print(out, file=sys.stderr)
+
+ if self.fp:
+ self.fp.write(out + "\n")
+
+ async def _read(self, stream, verbose, is_info):
+ """Ancillary routine to capture while displaying"""
+
+ while stream is not None:
+ line = await stream.readline()
+ if line:
+ out = line.decode("utf-8", errors="backslashreplace")
+ self.log(out, verbose, is_info)
+ if is_info:
+ self.stdout += out
+ else:
+ self.stderr += out
+ else:
+ break
+
+ async def run(self, cmd, capture_output=False, check=False,
+ env=None, verbose=True):
+
+ """
+ Execute an arbitrary command, handling errors.
+
+ Please notice that this class is not thread safe
+ """
+
+ self.stdout = ""
+ self.stderr = ""
+
+ self.log("$ " + " ".join(cmd), verbose)
+
+ proc = await asyncio.create_subprocess_exec(cmd[0],
+ *cmd[1:],
+ env=env,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE)
+
+ # Handle input and output in realtime
+ await asyncio.gather(
+ self._read(proc.stdout, verbose, True),
+ self._read(proc.stderr, verbose, False),
+ )
+
+ await proc.wait()
+
+ if check and proc.returncode > 0:
+ raise subprocess.CalledProcessError(returncode=proc.returncode,
+ cmd=" ".join(cmd),
+ output=self.stdout,
+ stderr=self.stderr)
+
+ if capture_output:
+ if proc.returncode > 0:
+ self.log(f"Error {proc.returncode}", verbose=True, is_info=False)
+ return ""
+
+ return self.output
+
+ ret = subprocess.CompletedProcess(args=cmd,
+ returncode=proc.returncode,
+ stdout=self.stdout,
+ stderr=self.stderr)
+
+ return ret
+
+
+class SphinxVenv:
+ """
+ Installs Sphinx on one virtual env per Sphinx version with a minimal
+ set of dependencies, adjusting them to each specific version.
+ """
+
+ def __init__(self):
+ """Initialize instance variables"""
+
+ self.built_time = {}
+ self.first_run = True
+
+ async def _handle_version(self, args, fp,
+ cur_ver, cur_requirements, python_bin):
+ """Handle a single Sphinx version"""
+
+ cmd = AsyncCommands(fp)
+
+ ver = ".".join(map(str, cur_ver))
+
+ if not self.first_run and args.wait_input and args.build:
+ ret = input("Press Enter to continue or 'a' to abort: ").strip().lower()
+ if ret == "a":
+ print("Aborted.")
+ sys.exit()
+ else:
+ self.first_run = False
+
+ venv_dir = f"Sphinx_{ver}"
+ req_file = f"requirements_{ver}.txt"
+
+ cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True)
+
+ # Create venv
+ await cmd.run([python_bin, "-m", "venv", venv_dir],
+ verbose=args.verbose, check=True)
+ pip = os.path.join(venv_dir, "bin/pip")
+
+ # Create install list
+ reqs = []
+ for pkg, verstr in cur_requirements.items():
+ reqs.append(f"{pkg}=={verstr}")
+
+ reqs.append(f"Sphinx=={ver}")
+
+ await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose)
+
+ # Freeze environment
+ result = await cmd.run([pip, "freeze"], verbose=False, check=True)
+
+ # Pip install succeeded. Write requirements file
+ if args.req_file:
+ with open(req_file, "w", encoding="utf-8") as fp:
+ fp.write(result.stdout)
+
+ if args.build:
+ start_time = time.time()
+
+ # Prepare a venv environment
+ env = os.environ.copy()
+ bin_dir = os.path.join(venv_dir, "bin")
+ env["PATH"] = bin_dir + ":" + env["PATH"]
+ env["VIRTUAL_ENV"] = venv_dir
+ if "PYTHONHOME" in env:
+ del env["PYTHONHOME"]
+
+ # Test doc build
+ await cmd.run(["make", "cleandocs"], env=env, check=True)
+ make = ["make"]
+
+ if args.output:
+ sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build")
+ make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"]
+
+ if args.make_args:
+ make += args.make_args
+
+ make += args.targets
+
+ if args.verbose:
+ cmd.log(f". {bin_dir}/activate", verbose=True)
+ await cmd.run(make, env=env, check=True, verbose=True)
+ if args.verbose:
+ cmd.log("deactivate", verbose=True)
+
+ end_time = time.time()
+ elapsed_time = end_time - start_time
+ hours, minutes = divmod(elapsed_time, 3600)
+ minutes, seconds = divmod(minutes, 60)
+
+ hours = int(hours)
+ minutes = int(minutes)
+ seconds = int(seconds)
+
+ self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+
+ cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True)
+
+ async def run(self, args):
+ """
+ Navigate though multiple Sphinx versions, handling each of them
+ on a loop.
+ """
+
+ if args.log:
+ fp = open(args.log, "w", encoding="utf-8")
+ if not args.verbose:
+ args.verbose = False
+ else:
+ fp = None
+ if not args.verbose:
+ args.verbose = True
+
+ cur_requirements = {}
+ python_bin = min_python_bin
+
+ vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions)
+
+ for cur_ver in sorted(vers):
+ if cur_ver in SPHINX_REQUIREMENTS:
+ new_reqs = SPHINX_REQUIREMENTS[cur_ver]
+ cur_requirements.update(new_reqs)
+
+ if cur_ver in PYTHON_VER_CHANGES: # pylint: disable=R1715
+ python_bin = PYTHON_VER_CHANGES[cur_ver]
+
+ if cur_ver not in args.versions:
+ continue
+
+ if args.min_version:
+ if cur_ver < args.min_version:
+ continue
+
+ if args.max_version:
+ if cur_ver > args.max_version:
+ break
+
+ await self._handle_version(args, fp, cur_ver, cur_requirements,
+ python_bin)
+
+ if args.build:
+ cmd = AsyncCommands(fp)
+ cmd.log("\nSummary:", verbose=True)
+ for ver, elapsed_time in sorted(self.built_time.items()):
+ cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}",
+ verbose=True)
+
+ if fp:
+ fp.close()
+
+def parse_version(ver_str):
+ """Convert a version string into a tuple."""
+
+ return tuple(map(int, ver_str.split(".")))
+
+
+DEFAULT_VERS = " - "
+DEFAULT_VERS += "\n - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}",
+ DEFAULT_VERSIONS_TO_TEST))
+
+SCRIPT = os.path.relpath(__file__)
+
+DESCRIPTION = f"""
+This tool allows creating Python virtual environments for different
+Sphinx versions that are supported by the Linux Kernel build system.
+
+Besides creating the virtual environment, it can also test building
+the documentation using "make htmldocs" (and/or other doc targets).
+
+If called without "--versions" argument, it covers the versions shipped
+on major distros, plus the lowest supported version:
+
+{DEFAULT_VERS}
+
+A typical usage is to run:
+
+ {SCRIPT} -m -l sphinx_builds.log
+
+This will create one virtual env for the default version set and run
+"make htmldocs" for each version, creating a log file with the
+excecuted commands on it.
+
+NOTE: The build time can be very long, specially on old versions. Also, there
+is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of
+memory. That, together with "-jauto" may cause OOM killer to cause
+failures at the doc generation. To minimize the risk, you may use the
+"-a" command line parameter to constrain the built directories and/or
+reduce the number of threads from "-jauto" to, for instance, "-j4":
+
+ {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'"
+
+"""
+
+MAKE_TARGETS = [
+ "htmldocs",
+ "texinfodocs",
+ "infodocs",
+ "latexdocs",
+ "pdfdocs",
+ "epubdocs",
+ "xmldocs",
+]
+
+async def main():
+ """Main program"""
+
+ parser = argparse.ArgumentParser(description=DESCRIPTION,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+
+ ver_group = parser.add_argument_group("Version range options")
+
+ ver_group.add_argument('-V', '--versions', nargs="*",
+ default=DEFAULT_VERSIONS_TO_TEST,type=parse_version,
+ help='Sphinx versions to test')
+ ver_group.add_argument('--min-version', "--min", type=parse_version,
+ help='Sphinx minimal version')
+ ver_group.add_argument('--max-version', "--max", type=parse_version,
+ help='Sphinx maximum version')
+ ver_group.add_argument('-f', '--full', action='store_true',
+ help='Add all Sphinx (major,minor) supported versions to the version range')
+
+ build_group = parser.add_argument_group("Build options")
+
+ build_group.add_argument('-b', '--build', action='store_true',
+ help='Build documentation')
+ build_group.add_argument('-a', '--make-args', nargs="*",
+ help='extra arguments for make, like SPHINXDIRS=netlink/specs',
+ )
+ build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS,
+ default=[MAKE_TARGETS[0]],
+ help="make build targets. Default: htmldocs.")
+ build_group.add_argument("-o", '--output',
+ help="output directory for the make O=OUTPUT")
+
+ other_group = parser.add_argument_group("Other options")
+
+ other_group.add_argument('-r', '--req-file', action='store_true',
+ help='write a requirements.txt file')
+ other_group.add_argument('-l', '--log',
+ help='Log command output on a file')
+ other_group.add_argument('-v', '--verbose', action='store_true',
+ help='Verbose all commands')
+ other_group.add_argument('-i', '--wait-input', action='store_true',
+ help='Wait for an enter before going to the next version')
+
+ args = parser.parse_args()
+
+ if not args.make_args:
+ args.make_args = []
+
+ sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys()))
+
+ if args.full:
+ args.versions += list(SPHINX_REQUIREMENTS.keys())
+
+ venv = SphinxVenv()
+ await venv.run(args)
+
+
+# Call main method
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index ed565eb52275..342e056c8c66 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -77,7 +77,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
clean:
rm -f $(ALL_PROGRAMS)
- rm -f $(OUTPUT)include/linux/gpio.h
+ rm -rf $(OUTPUT)include
find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index eab7b082f19d..03ca33869ce8 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -64,6 +64,7 @@ static const char * const iio_chan_type_name_spec[] = {
[IIO_COLORTEMP] = "colortemp",
[IIO_CHROMATICITY] = "chromaticity",
[IIO_ATTENTION] = "attention",
+ [IIO_ALTCURRENT] = "altcurrent",
};
static const char * const iio_ev_type_text[] = {
@@ -140,6 +141,10 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_PITCH] = "pitch",
[IIO_MOD_YAW] = "yaw",
[IIO_MOD_ROLL] = "roll",
+ [IIO_MOD_RMS] = "rms",
+ [IIO_MOD_ACTIVE] = "active",
+ [IIO_MOD_REACTIVE] = "reactive",
+ [IIO_MOD_APPARENT] = "apparent",
};
static bool event_is_known(struct iio_event_data *event)
@@ -187,6 +192,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_COLORTEMP:
case IIO_CHROMATICITY:
case IIO_ATTENTION:
+ case IIO_ALTCURRENT:
break;
default:
return false;
@@ -238,6 +244,10 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_PM4:
case IIO_MOD_PM10:
case IIO_MOD_O2:
+ case IIO_MOD_RMS:
+ case IIO_MOD_ACTIVE:
+ case IIO_MOD_REACTIVE:
+ case IIO_MOD_APPARENT:
break;
default:
return false;
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index e974ec932ec1..35f33780ca6c 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -10,7 +10,7 @@
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned int generic___fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word)
{
unsigned int num = BITS_PER_LONG - 1;
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 26f3ce1dd6e4..8eed3437edb9 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -10,7 +10,7 @@
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int generic_fls(unsigned int x)
+static __always_inline __attribute_const__ int generic_fls(unsigned int x)
{
int r = 32;
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 866f2b2304ff..b5f58dd261a3 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -16,7 +16,7 @@
* at position 64.
*/
#if BITS_PER_LONG == 32
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
__u32 h = x >> 32;
if (h)
@@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x)
return fls(x);
}
#elif BITS_PER_LONG == 64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
if (x == 0)
return 0;
diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h
new file mode 100644
index 000000000000..e5a0b07ad452
--- /dev/null
+++ b/tools/include/asm-generic/io.h
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_GENERIC_IO_H
+#define _TOOLS_ASM_GENERIC_IO_H
+
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#ifndef mmiowb_set_pending
+#define mmiowb_set_pending() do { } while (0)
+#endif
+
+#ifndef __io_br
+#define __io_br() barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar(v) rmb()
+#else
+#define __io_ar(v) barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw() wmb()
+#else
+#define __io_bw() barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw() mmiowb_set_pending()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw() __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw() __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr() __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par(v) __io_ar(v)
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ 0
+#endif
+
+static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_read_mmio(u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+
+/*
+ * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
+ *
+ * On some architectures memory mapped IO needs to be accessed differently.
+ * On the simple architectures, we just read/write the memory location
+ * directly.
+ */
+
+#ifndef __raw_readb
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readw
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ return *(const volatile u16 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readl
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ return *(const volatile u32 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readq
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ return *(const volatile u64 __force *)addr;
+}
+#endif
+
+#ifndef __raw_writeb
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 value, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writew
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 value, volatile void __iomem *addr)
+{
+ *(volatile u16 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writel
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 value, volatile void __iomem *addr)
+{
+ *(volatile u32 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writeq
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
+{
+ *(volatile u64 __force *)addr = value;
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}() access little endian memory and return result in
+ * native endianness.
+ */
+
+#ifndef readb
+#define readb readb
+static inline u8 readb(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __raw_readb(addr);
+ __io_ar(val);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw
+#define readw readw
+static inline u16 readw(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl
+#define readl readl
+static inline u32 readl(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readq
+#define readq readq
+static inline u64 readq(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb
+#define writeb writeb
+static inline void writeb(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeb(value, addr);
+ __io_aw();
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew
+#define writew writew
+static inline void writew(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel
+#define writel writel
+static inline void writel(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writeq
+#define writeq writeq
+static inline void writeq(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
+ * are not guaranteed to provide ordering against spinlocks or memory
+ * accesses.
+ */
+#ifndef readb_relaxed
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ val = __raw_readb(addr);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw_relaxed
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl_relaxed
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#if defined(readq) && !defined(readq_relaxed)
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb_relaxed
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeb(value, addr);
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew_relaxed
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel_relaxed
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#if defined(writeq) && !defined(writeq_relaxed)
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in
+ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times).
+ */
+#ifndef readsb
+#define readsb readsb
+static inline void readsb(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+
+ do {
+ u8 x = __raw_readb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsw
+#define readsw readsw
+static inline void readsw(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+
+ do {
+ u16 x = __raw_readw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsl
+#define readsl readsl
+static inline void readsl(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+
+ do {
+ u32 x = __raw_readl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsq
+#define readsq readsq
+static inline void readsq(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u64 *buf = buffer;
+
+ do {
+ u64 x = __raw_readq(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesb
+#define writesb writesb
+static inline void writesb(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+
+ do {
+ __raw_writeb(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesw
+#define writesw writesw
+static inline void writesw(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+
+ do {
+ __raw_writew(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesl
+#define writesl writesl
+static inline void writesl(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+
+ do {
+ __raw_writel(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesq
+#define writesq writesq
+static inline void writesq(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u64 *buf = buffer;
+
+ do {
+ __raw_writeq(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#endif /* _TOOLS_ASM_GENERIC_IO_H */
diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h
new file mode 100644
index 000000000000..eed5066f25c4
--- /dev/null
+++ b/tools/include/asm/io.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_IO_H
+#define _TOOLS_ASM_IO_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/io.h"
+#else
+#include <asm-generic/io.h>
+#endif
+
+#endif /* _TOOLS_ASM_IO_H */
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h
new file mode 100644
index 000000000000..2e8e65d975c7
--- /dev/null
+++ b/tools/include/linux/args.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/*
+ * How do these macros work?
+ *
+ * In __COUNT_ARGS() _0 to _12 are just placeholders from the start
+ * in order to make sure _n is positioned over the correct number
+ * from 12 to 0 (depending on X, which is a variadic argument list).
+ * They serve no purpose other than occupying a position. Since each
+ * macro parameter must have a distinct identifier, those identifiers
+ * are as good as any.
+ *
+ * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns
+ * that as _n.
+ */
+
+/* This counts to 15. Any more, it will return 16th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
index 01907b33537e..50c66ba9ada5 100644
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i);
#define atomic_cmpxchg_release atomic_cmpxchg
#endif /* atomic_cmpxchg_relaxed */
+static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+{
+ int ret, old = *oldp;
+
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+}
+
+static inline bool atomic_inc_unless_negative(atomic_t *v)
+{
+ int c = atomic_read(v);
+
+ do {
+ if (unlikely(c < 0))
+ return false;
+ } while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+ return true;
+}
+
#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index d4d300040d01..0d992245c600 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -3,6 +3,7 @@
#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
+#include <asm-generic/bitsperlong.h>
#include <linux/align.h>
#include <linux/bitops.h>
#include <linux/find.h>
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7ad056219115..a40cc861b3a7 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -2,10 +2,8 @@
#ifndef __LINUX_BITS_H
#define __LINUX_BITS_H
-#include <linux/const.h>
#include <vdso/bits.h>
#include <uapi/linux/bits.h>
-#include <asm/bitsperlong.h>
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
@@ -50,10 +48,14 @@
(type_max(t) << (l) & \
type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l)
+#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l)
+
#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
+#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l)
/*
* Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
@@ -79,28 +81,9 @@
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
-#define GENMASK_INPUT_CHECK(h, l) 0
+#define GENMASK(h, l) __GENMASK(h, l)
+#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l)
#endif /* !defined(__ASSEMBLY__) */
-#define GENMASK(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-#define GENMASK_ULL(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
-
-#if !defined(__ASSEMBLY__)
-/*
- * Missing asm support
- *
- * __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __int128' data
- * type instead of direct representation of 128 bit constants
- * such as long and unsigned long. The fundamental problem is
- * that a 128 bit constant will get silently truncated by the
- * gcc compiler.
- */
-#define GENMASK_U128(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
-#endif
-
#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
index 6b8713675765..a86af9bc8bdc 100644
--- a/tools/include/linux/cfi_types.h
+++ b/tools/include/linux/cfi_types.h
@@ -8,7 +8,7 @@
#ifdef __ASSEMBLY__
#include <linux/linkage.h>
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
/*
* Use the __kcfi_typeid_<function> type identifier symbol to
* annotate indirectly called assembly functions. The compiler emits
@@ -29,17 +29,40 @@
#define SYM_TYPED_START(name, linkage, align...) \
SYM_TYPED_ENTRY(name, linkage, align)
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
#define SYM_TYPED_START(name, linkage, align...) \
SYM_START(name, linkage, align)
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#ifndef SYM_TYPED_FUNC_START
#define SYM_TYPED_FUNC_START(name) \
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_CFI
+#define DEFINE_CFI_TYPE(name, func) \
+ /* \
+ * Force a reference to the function so the compiler generates \
+ * __kcfi_typeid_<func>. \
+ */ \
+ __ADDRESSABLE(func); \
+ /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \
+ extern u32 name; \
+ asm ( \
+ " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \
+ " .type " #name ",\%object \n" \
+ " .globl " #name " \n" \
+ " .p2align 2, 0x0 \n" \
+ #name ": \n" \
+ " .4byte __kcfi_typeid_" #func " \n" \
+ " .size " #name ", 4 \n" \
+ " .popsection \n" \
+ );
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 33411ca0cc90..f40bd2b04c29 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -138,6 +138,10 @@
# define __force
#endif
+#ifndef __iomem
+# define __iomem
+#endif
+
#ifndef __weak
# define __weak __attribute__((weak))
#endif
diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h
index 5f9f1ed190a0..65db9349f905 100644
--- a/tools/include/linux/gfp_types.h
+++ b/tools/include/linux/gfp_types.h
@@ -1 +1,392 @@
-#include "../../../include/linux/gfp_types.h"
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GFP_TYPES_H
+#define __LINUX_GFP_TYPES_H
+
+#include <linux/bits.h>
+
+/* The typedef is in types.h but we want the documentation here */
+#if 0
+/**
+ * typedef gfp_t - Memory allocation flags.
+ *
+ * GFP flags are commonly used throughout Linux to indicate how memory
+ * should be allocated. The GFP acronym stands for get_free_pages(),
+ * the underlying memory allocation function. Not every GFP flag is
+ * supported by every function which may allocate memory. Most users
+ * will want to use a plain ``GFP_KERNEL``.
+ */
+typedef unsigned int __bitwise gfp_t;
+#endif
+
+/*
+ * In case of changes, please don't forget to update
+ * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
+ */
+
+enum {
+ ___GFP_DMA_BIT,
+ ___GFP_HIGHMEM_BIT,
+ ___GFP_DMA32_BIT,
+ ___GFP_MOVABLE_BIT,
+ ___GFP_RECLAIMABLE_BIT,
+ ___GFP_HIGH_BIT,
+ ___GFP_IO_BIT,
+ ___GFP_FS_BIT,
+ ___GFP_ZERO_BIT,
+ ___GFP_UNUSED_BIT, /* 0x200u unused */
+ ___GFP_DIRECT_RECLAIM_BIT,
+ ___GFP_KSWAPD_RECLAIM_BIT,
+ ___GFP_WRITE_BIT,
+ ___GFP_NOWARN_BIT,
+ ___GFP_RETRY_MAYFAIL_BIT,
+ ___GFP_NOFAIL_BIT,
+ ___GFP_NORETRY_BIT,
+ ___GFP_MEMALLOC_BIT,
+ ___GFP_COMP_BIT,
+ ___GFP_NOMEMALLOC_BIT,
+ ___GFP_HARDWALL_BIT,
+ ___GFP_THISNODE_BIT,
+ ___GFP_ACCOUNT_BIT,
+ ___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+ ___GFP_SKIP_ZERO_BIT,
+ ___GFP_SKIP_KASAN_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+ ___GFP_NOLOCKDEP_BIT,
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+ ___GFP_NO_OBJ_EXT_BIT,
+#endif
+ ___GFP_LAST_BIT
+};
+
+/* Plain integer GFP bitmasks. Do not use this directly. */
+#define ___GFP_DMA BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO BIT(___GFP_IO_BIT)
+#define ___GFP_FS BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO BIT(___GFP_ZERO_BIT)
+/* 0x200u unused */
+#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT)
+#ifdef CONFIG_KASAN_HW_TAGS
+#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT)
+#else
+#define ___GFP_SKIP_ZERO 0
+#define ___GFP_SKIP_KASAN 0
+#endif
+#ifdef CONFIG_LOCKDEP
+#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT)
+#else
+#define ___GFP_NOLOCKDEP 0
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT)
+#else
+#define ___GFP_NO_OBJ_EXT 0
+#endif
+
+/*
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use them consistently. The definitions here may
+ * be used in bit comparisons.
+ */
+#define __GFP_DMA ((__force gfp_t)___GFP_DMA)
+#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
+#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
+#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+
+/**
+ * DOC: Page mobility and placement hints
+ *
+ * Page mobility and placement hints
+ * ---------------------------------
+ *
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
+ *
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
+ *
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
+ *
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * %__GFP_THISNODE forces the allocation to be satisfied from the requested
+ * node with no fallbacks or placement policy enforcements.
+ *
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
+#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT)
+
+/**
+ * DOC: Watermark modifiers
+ *
+ * Watermark modifiers -- controls access to emergency reserves
+ * ------------------------------------------------------------
+ *
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example creating an IO context to clean pages and requests
+ * from atomic context.
+ *
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * Users of this flag have to be extremely careful to not deplete the reserve
+ * completely and implement a throttling mechanism which controls the
+ * consumption of the reserve based on the amount of freed memory.
+ * Usage of a pre-allocated pool (e.g. mempool) should be always considered
+ * before using this flag.
+ *
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
+ */
+#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+
+/**
+ * DOC: Reclaim modifiers
+ *
+ * Reclaim modifiers
+ * -----------------
+ * Please note that all the following flags are only applicable to sleepable
+ * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
+ *
+ * %__GFP_IO can start physical IO.
+ *
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
+ *
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
+ *
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
+ *
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ *
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules. Please note that all of them must be used along with
+ * %__GFP_DIRECT_RECLAIM flag.
+ *
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput.
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made elsewhere. It can wait for other
+ * tasks to attempt high-level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon. The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM.
+ * It should _never_ be used in non-sleepable contexts.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is
+ * not supported. Please consider using kvmalloc() instead.
+ */
+#define __GFP_IO ((__force gfp_t)___GFP_IO)
+#define __GFP_FS ((__force gfp_t)___GFP_FS)
+#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
+#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL)
+#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
+#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
+
+/**
+ * DOC: Action modifiers
+ *
+ * Action modifiers
+ * ----------------
+ *
+ * %__GFP_NOWARN suppresses allocation failure reports.
+ *
+ * %__GFP_COMP address compound page metadata.
+ *
+ * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself
+ * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
+ * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
+ * memory tags at the same time as zeroing memory has minimal additional
+ * performance impact.
+ *
+ * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
+ * Used for userspace and vmalloc pages; the latter are unpoisoned by
+ * kasan_unpoison_vmalloc instead. For userspace pages, results in
+ * poisoning being skipped as well, see should_skip_kasan_poison for
+ * details. Only effective in HW_TAGS mode.
+ */
+#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
+#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
+#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
+#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN)
+
+/* Disable lockdep for GFP context tracking */
+#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
+
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ----------------------------
+ *
+ * Useful GFP flag combinations that are commonly used. It is recommended
+ * that subsystems start with one of these combinations and then set/clear
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves".
+ * The current implementation doesn't support NMI and few other strict
+ * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT.
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback. It is very
+ * likely to fail to allocate memory, even for very small allocations.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
+ * because the DMA32 kmalloc cache array is not implemented.
+ * (Reason: there is no such user in kernel).
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
+ */
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN)
+#define GFP_NOIO (__GFP_RECLAIM)
+#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
+#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA __GFP_DMA
+#define GFP_DMA32 __GFP_DMA32
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN)
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
+#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+
+#endif /* __LINUX_GFP_TYPES_H */
diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h
index aaa8a0767aa3..c5a2fed49eb0 100644
--- a/tools/include/linux/interval_tree_generic.h
+++ b/tools/include/linux/interval_tree_generic.h
@@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \
* Cond2: start <= ITLAST(node) \
*/ \
\
-static ITSTRUCT * \
+ITSTATIC ITSTRUCT * \
ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
{ \
while (true) { \
@@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
if (ITSTART(node) <= last) { /* Cond1 */ \
if (start <= ITLAST(node)) /* Cond2 */ \
return node; /* node is leftmost match */ \
- if (node->ITRB.rb_right) { \
- node = rb_entry(node->ITRB.rb_right, \
- ITSTRUCT, ITRB); \
- if (start <= node->ITSUBTREE) \
- continue; \
- } \
+ node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \
+ continue; \
} \
return NULL; /* No match */ \
} \
diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h
index e129871fe661..4b94b84160b8 100644
--- a/tools/include/linux/io.h
+++ b/tools/include/linux/io.h
@@ -2,4 +2,6 @@
#ifndef _TOOLS_IO_H
#define _TOOLS_IO_H
-#endif
+#include <asm/io.h>
+
+#endif /* _TOOLS_IO_H */
diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h
new file mode 100644
index 000000000000..138af19b0f5c
--- /dev/null
+++ b/tools/include/linux/livepatch_external.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * External livepatch interfaces for patch creation tooling
+ */
+
+#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_
+#define _LINUX_LIVEPATCH_EXTERNAL_H_
+
+#include <linux/types.h>
+
+#define KLP_RELOC_SEC_PREFIX ".klp.rela."
+#define KLP_SYM_PREFIX ".klp.sym."
+
+#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_
+#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_
+#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_
+#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_
+
+#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX)
+#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX)
+#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX)
+#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX)
+
+struct klp_object;
+
+typedef int (*klp_pre_patch_t)(struct klp_object *obj);
+typedef void (*klp_post_patch_t)(struct klp_object *obj);
+typedef void (*klp_pre_unpatch_t)(struct klp_object *obj);
+typedef void (*klp_post_unpatch_t)(struct klp_object *obj);
+
+/**
+ * struct klp_callbacks - pre/post live-(un)patch callback structure
+ * @pre_patch: executed before code patching
+ * @post_patch: executed after code patching
+ * @pre_unpatch: executed before code unpatching
+ * @post_unpatch: executed after code unpatching
+ * @post_unpatch_enabled: flag indicating if post-unpatch callback
+ * should run
+ *
+ * All callbacks are optional. Only the pre-patch callback, if provided,
+ * will be unconditionally executed. If the parent klp_object fails to
+ * patch for any reason, including a non-zero error status returned from
+ * the pre-patch callback, no further callbacks will be executed.
+ */
+struct klp_callbacks {
+ klp_pre_patch_t pre_patch;
+ klp_post_patch_t post_patch;
+ klp_pre_unpatch_t pre_unpatch;
+ klp_post_unpatch_t post_unpatch;
+ bool post_unpatch_enabled;
+};
+
+/*
+ * 'struct klp_{func,object}_ext' are compact "external" representations of
+ * 'struct klp_{func,object}'. They are used by objtool for livepatch
+ * generation. The structs are then read by the livepatch module and converted
+ * to the real structs before calling klp_enable_patch().
+ *
+ * TODO make these the official API for klp_enable_patch(). That should
+ * simplify livepatch's interface as well as its data structure lifetime
+ * management.
+ */
+struct klp_func_ext {
+ const char *old_name;
+ void *new_func;
+ unsigned long sympos;
+};
+
+struct klp_object_ext {
+ const char *name;
+ struct klp_func_ext *funcs;
+ struct klp_callbacks callbacks;
+ unsigned int nr_funcs;
+};
+
+#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
index df5d9fa84dba..c6def4049b1a 100644
--- a/tools/include/linux/objtool_types.h
+++ b/tools/include/linux/objtool_types.h
@@ -65,5 +65,8 @@ struct unwind_hint {
#define ANNOTYPE_IGNORE_ALTS 6
#define ANNOTYPE_INTRA_FUNCTION_CALL 7
#define ANNOTYPE_REACHABLE 8
+#define ANNOTYPE_NOCFI 9
+
+#define ANNOTYPE_DATA_SPECIAL 1
#endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h
new file mode 120000
index 000000000000..1c9e88f41261
--- /dev/null
+++ b/tools/include/linux/pci_ids.h
@@ -0,0 +1 @@
+../../../include/linux/pci_ids.h \ No newline at end of file
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index c87051e2b26f..94937a699402 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -4,11 +4,31 @@
#include <linux/types.h>
#include <linux/gfp.h>
+#include <pthread.h>
-#define SLAB_PANIC 2
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define kzalloc_node(size, flags, node) kmalloc(size, flags)
+enum _slab_flag_bits {
+ _SLAB_KMALLOC,
+ _SLAB_HWCACHE_ALIGN,
+ _SLAB_PANIC,
+ _SLAB_TYPESAFE_BY_RCU,
+ _SLAB_ACCOUNT,
+ _SLAB_FLAGS_LAST_BIT
+};
+
+#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr)))
+#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U))
+
+#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
+#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
+#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
+#ifdef CONFIG_MEMCG
+# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
+#else
+# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
+#endif
void *kmalloc(size_t size, gfp_t gfp);
void kfree(void *p);
@@ -23,6 +43,98 @@ enum slab_state {
FULL
};
+struct kmem_cache {
+ pthread_mutex_t lock;
+ unsigned int size;
+ unsigned int align;
+ unsigned int sheaf_capacity;
+ int nr_objs;
+ void *objs;
+ void (*ctor)(void *);
+ bool non_kernel_enabled;
+ unsigned int non_kernel;
+ unsigned long nr_allocated;
+ unsigned long nr_tallocated;
+ bool exec_callback;
+ void (*callback)(void *);
+ void *private;
+};
+
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @sheaf_capacity: The maximum size of the sheaf.
+ */
+ unsigned int sheaf_capacity;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
+struct slab_sheaf {
+ union {
+ struct list_head barn_list;
+ /* only used for prefilled sheafs */
+ unsigned int capacity;
+ };
+ struct kmem_cache *cache;
+ unsigned int size;
+ int node; /* only used for rcu_sheaf */
+ void *objects[];
+};
+
static inline void *kzalloc(size_t size, gfp_t gfp)
{
return kmalloc(size, gfp | __GFP_ZERO);
@@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
- unsigned int align, unsigned int flags,
- void (*ctor)(void *));
+
+struct kmem_cache *
+__kmem_cache_create_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags);
+
+/* If NULL is passed for @args, use this variant with default arguments. */
+static inline struct kmem_cache *
+__kmem_cache_default_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags)
+{
+ struct kmem_cache_args kmem_default_args = {};
+
+ return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
+}
+
+static inline struct kmem_cache *
+__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+ unsigned int flags, void (*ctor)(void *))
+{
+ struct kmem_cache_args kmem_args = {
+ .align = align,
+ .ctor = ctor,
+ };
+
+ return __kmem_cache_create_args(name, size, &kmem_args, flags);
+}
+
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ _Generic((__args), \
+ struct kmem_cache_args *: __kmem_cache_create_args, \
+ void *: __kmem_cache_default_args, \
+ default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **list);
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+void *
+kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf **sheafp, unsigned int size);
+
+static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
+{
+ return sheaf->size;
+}
#endif /* _TOOLS_SLAB_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 5a00b8b2cf9f..cfb6ddeb292b 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -25,6 +25,8 @@
#define STATIC_CALL_SITE_INIT 2UL /* init section */
#define STATIC_CALL_SITE_FLAGS 3UL
+#ifndef __ASSEMBLY__
+
/*
* The static call site table needs to be created by external tooling (objtool
* or a compiler plugin).
@@ -100,4 +102,6 @@ struct static_call_key {
#endif /* CONFIG_HAVE_STATIC_CALL */
+#endif /* __ASSEMBLY__ */
+
#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index 8499f509f03e..51ad3cf4fa82 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix)
return strncmp(str, prefix, strlen(prefix)) == 0;
}
+/*
+ * Checks if a string ends with another.
+ */
+static inline bool str_ends_with(const char *str, const char *substr)
+{
+ size_t len = strlen(str);
+ size_t sublen = strlen(substr);
+
+ if (sublen > len)
+ return false;
+
+ return !strcmp(str + len - sublen, substr);
+}
+
extern char * __must_check skip_spaces(const char *);
extern char *strim(char *);
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 143c2d2c2ba6..8118e22844f1 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -23,7 +23,7 @@ else
Q=@
endif
-arch_file := arch-$(ARCH).h
+arch_files := arch.h $(wildcard arch-*.h)
all_files := \
compiler.h \
crt.h \
@@ -33,6 +33,7 @@ all_files := \
errno.h \
fcntl.h \
getopt.h \
+ inttypes.h \
limits.h \
math.h \
nolibc.h \
@@ -56,12 +57,14 @@ all_files := \
sys/random.h \
sys/reboot.h \
sys/resource.h \
+ sys/select.h \
sys/stat.h \
sys/syscall.h \
sys/sysmacros.h \
sys/time.h \
sys/timerfd.h \
sys/types.h \
+ sys/uio.h \
sys/utsname.h \
sys/wait.h \
time.h \
@@ -79,7 +82,7 @@ help:
@echo "Supported targets under nolibc:"
@echo " all call \"headers\""
@echo " clean clean the sysroot"
- @echo " headers prepare a sysroot in tools/include/nolibc/sysroot"
+ @echo " headers prepare a multi-arch sysroot in \$${OUTPUT}sysroot"
@echo " headers_standalone like \"headers\", and also install kernel headers"
@echo " help this help"
@echo ""
@@ -90,18 +93,11 @@ help:
@echo " OUTPUT = $(OUTPUT)"
@echo ""
+# installs headers for all archs at once.
headers:
- $(Q)mkdir -p $(OUTPUT)sysroot
- $(Q)mkdir -p $(OUTPUT)sysroot/include
- $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/
- $(Q)if [ "$(ARCH)" = "i386" -o "$(ARCH)" = "x86_64" ]; then \
- cat arch-x86.h; \
- elif [ -e "$(arch_file)" ]; then \
- cat $(arch_file); \
- else \
- echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
- exit 1; \
- fi > $(OUTPUT)sysroot/include/arch.h
+ $(Q)mkdir -p "$(OUTPUT)sysroot"
+ $(Q)mkdir -p "$(OUTPUT)sysroot/include"
+ $(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/"
headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 1f66e7e5a444..251c42579028 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -184,6 +184,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -193,5 +194,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-arm64.h b/tools/include/nolibc/arch-arm64.h
index 02a3f74c8ec8..080a55a7144e 100644
--- a/tools/include/nolibc/arch-arm64.h
+++ b/tools/include/nolibc/arch-arm64.h
@@ -141,6 +141,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -150,4 +151,5 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_ARM64_H */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 5511705303ea..c894176c3f89 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -142,6 +142,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -151,5 +152,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
index 6dac1845f298..2a4fbada5e79 100644
--- a/tools/include/nolibc/arch-m68k.h
+++ b/tools/include/nolibc/arch-m68k.h
@@ -128,6 +128,7 @@
_num; \
})
+#ifndef NOLIBC_NO_RUNTIME
void _start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -137,5 +138,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 0cbac63b249a..a72506ceec6b 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -245,6 +245,7 @@
#endif /* _ABIO32 */
+#ifndef NOLIBC_NO_RUNTIME
/* startup code, note that it's called __start on MIPS */
void __start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
@@ -266,5 +267,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index 204564bbcd32..e0c7e0b81f7c 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -183,6 +183,7 @@
#endif
#endif /* !__powerpc64__ */
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -215,5 +216,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
#endif
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 885383a86c38..1c00cacf57e1 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -139,6 +139,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -152,5 +153,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index df4c3cc713ac..74125a254ce3 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -139,22 +139,19 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
-#ifdef __s390x__
"lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
"aghi %r15, -160\n" /* allocate new stackframe */
-#else
- "lr %r2, %r15\n"
- "ahi %r15, -96\n"
-#endif
"xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
"brasl %r14, _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
struct s390_mmap_arg_struct {
unsigned long addr;
diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h
index a96b8914607e..7a421197d104 100644
--- a/tools/include/nolibc/arch-sh.h
+++ b/tools/include/nolibc/arch-sh.h
@@ -140,6 +140,7 @@
_ret; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void _start_wrapper(void);
void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void)
@@ -158,5 +159,6 @@ void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _st
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_SH_H */
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
index ca420d843e25..2ebb5686e105 100644
--- a/tools/include/nolibc/arch-sparc.h
+++ b/tools/include/nolibc/arch-sparc.h
@@ -152,6 +152,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -169,6 +170,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
static pid_t getpid(void);
diff --git a/tools/include/nolibc/arch-x86.h b/tools/include/nolibc/arch-x86.h
index d3efc0c3b8ad..f6c43ac5377b 100644
--- a/tools/include/nolibc/arch-x86.h
+++ b/tools/include/nolibc/arch-x86.h
@@ -157,6 +157,7 @@
_eax; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
/*
* i386 System V ABI mandates:
@@ -176,6 +177,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#else /* !defined(__x86_64__) */
@@ -323,6 +325,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
_ret; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
/*
* x86-64 System V ABI mandates:
@@ -340,6 +343,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#define NOLIBC_ARCH_HAS_MEMMOVE
void *memmove(void *dst, const void *src, size_t len);
@@ -351,7 +355,7 @@ void *memcpy(void *dst, const void *src, size_t len);
void *memset(void *dst, int c, size_t len);
__asm__ (
-".section .text.nolibc_memmove_memcpy\n"
+".pushsection .text.nolibc_memmove_memcpy\n"
".weak memmove\n"
".weak memcpy\n"
"memmove:\n"
@@ -371,8 +375,9 @@ __asm__ (
"rep movsb\n\t"
"cld\n\t"
"retq\n"
+".popsection\n"
-".section .text.nolibc_memset\n"
+".pushsection .text.nolibc_memset\n"
".weak memset\n"
"memset:\n"
"xchgl %eax, %esi\n\t"
@@ -381,6 +386,7 @@ __asm__ (
"rep stosb\n\t"
"popq %rax\n\t"
"retq\n"
+".popsection\n"
);
#endif /* !defined(__x86_64__) */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 426c89198135..a3adaf433f2c 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -3,15 +3,6 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry point). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
#ifndef _NOLIBC_ARCH_H
#define _NOLIBC_ARCH_H
@@ -27,7 +18,7 @@
#include "arch-powerpc.h"
#elif defined(__riscv)
#include "arch-riscv.h"
-#elif defined(__s390x__) || defined(__s390__)
+#elif defined(__s390x__)
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index 369cfb5a0e78..87090bbc53e0 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -41,8 +41,8 @@
# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
#endif /* __nolibc_has_attribute(no_stack_protector) */
-#if __nolibc_has_attribute(fallthrough)
-# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough))
+#if __nolibc_has_attribute(__fallthrough__)
+# define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__))
#else
# define __nolibc_fallthrough do { } while (0)
#endif /* __nolibc_has_attribute(fallthrough) */
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 961cfe777c35..d9262998dae9 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_CRT_H
#define _NOLIBC_CRT_H
+#ifndef NOLIBC_NO_RUNTIME
+
#include "compiler.h"
char **environ __attribute__((weak));
@@ -88,4 +90,5 @@ void _start_c(long *sp)
exit(exitcode);
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_CRT_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
index 758b95c48e7a..61a122a60327 100644
--- a/tools/include/nolibc/dirent.h
+++ b/tools/include/nolibc/dirent.h
@@ -86,9 +86,9 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
* readdir() can only return one entry at a time.
* Make sure the non-returned ones are not skipped.
*/
- ret = lseek(fd, ldir->d_off, SEEK_SET);
- if (ret == -1)
- return errno;
+ ret = sys_lseek(fd, ldir->d_off, SEEK_SET);
+ if (ret < 0)
+ return -ret;
entry->d_ino = ldir->d_ino;
/* the destination should always be big enough */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
index 217abb95264b..87565e3b6a33 100644
--- a/tools/include/nolibc/getopt.h
+++ b/tools/include/nolibc/getopt.h
@@ -78,7 +78,7 @@ int getopt(int argc, char * const argv[], const char *optstring)
return '?';
}
if (optstring[i] == ':') {
- optarg = 0;
+ optarg = NULL;
if (optstring[i + 1] != ':' || __optpos) {
optarg = argv[optind++];
if (__optpos)
diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h
new file mode 100644
index 000000000000..1977bd74bfeb
--- /dev/null
+++ b/tools/include/nolibc/inttypes.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index c199ade200c2..272dfc961158 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -104,11 +104,13 @@
#include "sys/random.h"
#include "sys/reboot.h"
#include "sys/resource.h"
+#include "sys/select.h"
#include "sys/stat.h"
#include "sys/syscall.h"
#include "sys/sysmacros.h"
#include "sys/time.h"
#include "sys/timerfd.h"
+#include "sys/uio.h"
#include "sys/utsname.h"
#include "sys/wait.h"
#include "ctype.h"
@@ -116,6 +118,7 @@
#include "sched.h"
#include "signal.h"
#include "unistd.h"
+#include "stdbool.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h
index 1765acb17ea0..0d053f93ea99 100644
--- a/tools/include/nolibc/poll.h
+++ b/tools/include/nolibc/poll.h
@@ -39,10 +39,8 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout)
t.tv_nsec = (timeout % 1000) * 1000000;
}
return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
#else
- return __nolibc_enosys(__func__, fds, nfds, timeout);
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
#endif
}
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index c71a2c257177..7123aa056cb0 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -9,6 +9,7 @@
#include "compiler.h"
+#ifndef NOLIBC_NO_RUNTIME
#if defined(_NOLIBC_STACKPROTECTOR)
#include "sys.h"
@@ -49,5 +50,6 @@ static __no_stack_protector void __stack_chk_init(void)
#else /* !defined(_NOLIBC_STACKPROTECTOR) */
static void __stack_chk_init(void) {}
#endif /* defined(_NOLIBC_STACKPROTECTOR) */
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_STACKPROTECTOR_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index ba950f0e7338..392f4dd94158 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -20,15 +20,15 @@
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
-typedef unsigned long ino_t;
+typedef uint64_t ino_t;
typedef unsigned int mode_t;
typedef signed int pid_t;
typedef unsigned int uid_t;
typedef unsigned int gid_t;
typedef unsigned long nlink_t;
-typedef signed long off_t;
+typedef int64_t off_t;
typedef signed long blksize_t;
typedef signed long blkcnt_t;
-typedef __kernel_old_time_t time_t;
+typedef __kernel_time_t time_t;
#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 7630234408c5..1f16dab2ac88 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -321,11 +321,13 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char
if (!outstr)
outstr="(null)";
}
-#ifndef NOLIBC_IGNORE_ERRNO
else if (c == 'm') {
+#ifdef NOLIBC_IGNORE_ERRNO
+ outstr = "unknown error";
+#else
outstr = strerror(errno);
- }
#endif /* NOLIBC_IGNORE_ERRNO */
+ }
else if (c == '%') {
/* queue it verbatim */
continue;
@@ -600,7 +602,11 @@ int sscanf(const char *str, const char *format, ...)
static __attribute__((unused))
void perror(const char *msg)
{
+#ifdef NOLIBC_IGNORE_ERRNO
+ fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "");
+#else
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+#endif
}
static __attribute__((unused))
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 5fd99a480f82..f184e108ed0a 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -100,6 +100,7 @@ void free(void *ptr)
munmap(heap, heap->len);
}
+#ifndef NOLIBC_NO_RUNTIME
/* getenv() tries to find the environment variable named <name> in the
* environment array pointed to by global variable "environ" which must be
* declared as a char **, and must be terminated by a NULL (it is recommended
@@ -122,6 +123,7 @@ char *getenv(const char *name)
}
return NULL;
}
+#endif /* NOLIBC_NO_RUNTIME */
static __attribute__((unused))
void *malloc(size_t len)
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 163a17e7dd38..4000926f44ac 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -93,6 +93,21 @@ void *memset(void *dst, int b, size_t len)
}
#endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */
+#ifndef NOLIBC_ARCH_HAS_MEMCHR
+static __attribute__((unused))
+void *memchr(const void *s, int c, size_t len)
+{
+ char *p = (char *)s;
+
+ while (len--) {
+ if (*p == (char)c)
+ return p;
+ p++;
+ }
+ return NULL;
+}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */
+
static __attribute__((unused))
char *strchr(const char *s, int c)
{
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 295e71d34aba..847af1ccbdc9 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -106,7 +106,7 @@ static __attribute__((unused))
void *sbrk(intptr_t inc)
{
/* first call to find current end */
- void *ret = sys_brk(0);
+ void *ret = sys_brk(NULL);
if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
@@ -118,6 +118,7 @@ void *sbrk(intptr_t inc)
/*
* int chdir(const char *path);
+ * int fchdir(int fildes);
*/
static __attribute__((unused))
@@ -132,6 +133,18 @@ int chdir(const char *path)
return __sysret(sys_chdir(path));
}
+static __attribute__((unused))
+int sys_fchdir(int fildes)
+{
+ return my_syscall1(__NR_fchdir, fildes);
+}
+
+static __attribute__((unused))
+int fchdir(int fildes)
+{
+ return __sysret(sys_fchdir(fildes));
+}
+
/*
* int chmod(const char *path, mode_t mode);
@@ -142,10 +155,8 @@ int sys_chmod(const char *path, mode_t mode)
{
#if defined(__NR_fchmodat)
return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
- return my_syscall2(__NR_chmod, path, mode);
#else
- return __nolibc_enosys(__func__, path, mode);
+ return my_syscall2(__NR_chmod, path, mode);
#endif
}
@@ -165,10 +176,8 @@ int sys_chown(const char *path, uid_t owner, gid_t group)
{
#if defined(__NR_fchownat)
return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
- return my_syscall3(__NR_chown, path, owner, group);
#else
- return __nolibc_enosys(__func__, path, owner, group);
+ return my_syscall3(__NR_chown, path, owner, group);
#endif
}
@@ -238,11 +247,22 @@ static __attribute__((unused))
int sys_dup2(int old, int new)
{
#if defined(__NR_dup3)
+ int ret, nr_fcntl;
+
+#ifdef __NR_fcntl64
+ nr_fcntl = __NR_fcntl64;
+#else
+ nr_fcntl = __NR_fcntl;
+#endif
+
+ if (old == new) {
+ ret = my_syscall2(nr_fcntl, old, F_GETFD);
+ return ret < 0 ? ret : old;
+ }
+
return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
- return my_syscall2(__NR_dup2, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_dup2, old, new);
#endif
}
@@ -327,10 +347,8 @@ pid_t sys_fork(void)
* will not use the rest with no other flag.
*/
return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
- return my_syscall0(__NR_fork);
#else
- return __nolibc_enosys(__func__);
+ return my_syscall0(__NR_fork);
#endif
}
#endif
@@ -347,7 +365,7 @@ pid_t sys_vfork(void)
{
#if defined(__NR_vfork)
return my_syscall0(__NR_vfork);
-#elif defined(__NR_clone3)
+#else
/*
* clone() could be used but has different argument orders per
* architecture.
@@ -358,8 +376,6 @@ pid_t sys_vfork(void)
};
return my_syscall2(__NR_clone3, &args, sizeof(args));
-#else
- return __nolibc_enosys(__func__);
#endif
}
#endif
@@ -509,6 +525,7 @@ pid_t gettid(void)
return sys_gettid();
}
+#ifndef NOLIBC_NO_RUNTIME
static unsigned long getauxval(unsigned long key);
/*
@@ -520,7 +537,7 @@ int getpagesize(void)
{
return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT);
}
-
+#endif /* NOLIBC_NO_RUNTIME */
/*
* uid_t getuid(void);
@@ -569,10 +586,8 @@ int sys_link(const char *old, const char *new)
{
#if defined(__NR_linkat)
return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
- return my_syscall2(__NR_link, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_link, old, new);
#endif
}
@@ -590,44 +605,27 @@ int link(const char *old, const char *new)
static __attribute__((unused))
off_t sys_lseek(int fd, off_t offset, int whence)
{
-#if defined(__NR_lseek)
- return my_syscall3(__NR_lseek, fd, offset, whence);
-#else
- return __nolibc_enosys(__func__, fd, offset, whence);
-#endif
-}
-
-static __attribute__((unused))
-int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low,
- __kernel_loff_t *result, int whence)
-{
#if defined(__NR_llseek)
- return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence);
+ __kernel_loff_t loff = 0;
+ off_t result;
+ int ret;
+
+ ret = my_syscall5(__NR_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence);
+ if (ret < 0)
+ result = ret;
+ else
+ result = loff;
+
+ return result;
#else
- return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence);
+ return my_syscall3(__NR_lseek, fd, offset, whence);
#endif
}
static __attribute__((unused))
off_t lseek(int fd, off_t offset, int whence)
{
- __kernel_loff_t loff = 0;
- off_t result;
- int ret;
-
- result = sys_lseek(fd, offset, whence);
- if (result == -ENOSYS) {
- /* Only exists on 32bit where nolibc off_t is also 32bit */
- ret = sys_llseek(fd, 0, offset, &loff, whence);
- if (ret < 0)
- result = ret;
- else if (loff != (off_t)loff)
- result = -EOVERFLOW;
- else
- result = loff;
- }
-
- return __sysret(result);
+ return __sysret(sys_lseek(fd, offset, whence));
}
@@ -640,10 +638,8 @@ int sys_mkdir(const char *path, mode_t mode)
{
#if defined(__NR_mkdirat)
return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
- return my_syscall2(__NR_mkdir, path, mode);
#else
- return __nolibc_enosys(__func__, path, mode);
+ return my_syscall2(__NR_mkdir, path, mode);
#endif
}
@@ -662,10 +658,8 @@ int sys_rmdir(const char *path)
{
#if defined(__NR_rmdir)
return my_syscall1(__NR_rmdir, path);
-#elif defined(__NR_unlinkat)
- return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
#else
- return __nolibc_enosys(__func__, path);
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
#endif
}
@@ -685,10 +679,8 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev)
{
#if defined(__NR_mknodat)
return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
- return my_syscall3(__NR_mknod, path, mode, dev);
#else
- return __nolibc_enosys(__func__, path, mode, dev);
+ return my_syscall3(__NR_mknod, path, mode, dev);
#endif
}
@@ -775,53 +767,6 @@ int sched_yield(void)
/*
- * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
- * fd_set *except_fds, struct timeval *timeout);
- */
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
- struct sel_arg_struct {
- unsigned long n;
- fd_set *r, *w, *e;
- struct timeval *t;
- } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
- return my_syscall1(__NR_select, &arg);
-#elif defined(__NR__newselect)
- return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_select)
- return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_pselect6)
- struct timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR_pselect6_time64)
- struct __kernel_timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#else
- return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout);
-#endif
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
- return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
-}
-
-
-/*
* int setpgid(pid_t pid, pid_t pgid);
*/
@@ -874,10 +819,8 @@ int sys_symlink(const char *old, const char *new)
{
#if defined(__NR_symlinkat)
return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
- return my_syscall2(__NR_symlink, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_symlink, old, new);
#endif
}
@@ -931,10 +874,8 @@ int sys_unlink(const char *path)
{
#if defined(__NR_unlinkat)
return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
- return my_syscall1(__NR_unlink, path);
#else
- return __nolibc_enosys(__func__, path);
+ return my_syscall1(__NR_unlink, path);
#endif
}
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
index c52463d6c18d..0e98325e7347 100644
--- a/tools/include/nolibc/sys/auxv.h
+++ b/tools/include/nolibc/sys/auxv.h
@@ -10,6 +10,8 @@
#ifndef _NOLIBC_SYS_AUXV_H
#define _NOLIBC_SYS_AUXV_H
+#ifndef NOLIBC_NO_RUNTIME
+
#include "../crt.h"
static __attribute__((unused))
@@ -38,4 +40,5 @@ unsigned long getauxval(unsigned long type)
return ret;
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
index 5228751b458c..77084ac3405a 100644
--- a/tools/include/nolibc/sys/mman.h
+++ b/tools/include/nolibc/sys/mman.h
@@ -31,11 +31,6 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
}
#endif
-/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
- * which returns -1 upon error and still satisfy user land that checks for
- * MAP_FAILED.
- */
-
static __attribute__((unused))
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h
index 8d9749f1c845..cd5d25c571a8 100644
--- a/tools/include/nolibc/sys/random.h
+++ b/tools/include/nolibc/sys/random.h
@@ -22,13 +22,13 @@
static __attribute__((unused))
ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags)
{
- return my_syscall3(__NR_getrandom, buf, buflen, flags);
+ return my_syscall3(__NR_getrandom, buf, buflen, flags);
}
static __attribute__((unused))
ssize_t getrandom(void *buf, size_t buflen, unsigned int flags)
{
- return __sysret(sys_getrandom(buf, buflen, flags));
+ return __sysret(sys_getrandom(buf, buflen, flags));
}
#endif /* _NOLIBC_SYS_RANDOM_H */
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
index 4a1e435be669..38274c64a722 100644
--- a/tools/include/nolibc/sys/reboot.h
+++ b/tools/include/nolibc/sys/reboot.h
@@ -28,7 +28,7 @@ ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
static __attribute__((unused))
int reboot(int cmd)
{
- return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
+ return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL));
}
#endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h
new file mode 100644
index 000000000000..2a5619c01277
--- /dev/null
+++ b/tools/include/nolibc/sys/select.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SELECT_H
+#define _NOLIBC_SYS_SELECT_H
+
+#include <linux/time.h>
+#include <linux/unistd.h>
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+#define FD_SETIDXMASK (8 * sizeof(unsigned long))
+#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
+
+/* for select() */
+typedef struct {
+ unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
+} fd_set;
+
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] &= \
+ ~(1U << (__fd & FD_SETBITMASK)); \
+ } while (0)
+
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] |= \
+ 1 << (__fd & FD_SETBITMASK); \
+ } while (0)
+
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
+1U << (__fd & FD_SETBITMASK)); \
+ __r; \
+ })
+
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
+ for (__idx = 0; __idx < __size; __idx++) \
+ __set->fds[__idx] = 0; \
+ } while (0)
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ * fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__NR__newselect)
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_select)
+ return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+ struct __kernel_timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
+}
+
+
+#endif /* _NOLIBC_SYS_SELECT_H */
diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h
index 4375d546ba58..5dd61030c991 100644
--- a/tools/include/nolibc/sys/timerfd.h
+++ b/tools/include/nolibc/sys/timerfd.h
@@ -34,7 +34,7 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
{
#if defined(__NR_timerfd_gettime)
return my_syscall2(__NR_timerfd_gettime, fd, curr_value);
-#elif defined(__NR_timerfd_gettime64)
+#else
struct __kernel_itimerspec kcurr_value;
int ret;
@@ -42,8 +42,6 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
__nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
__nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
return ret;
-#else
- return __nolibc_enosys(__func__, fd, curr_value);
#endif
}
@@ -60,7 +58,7 @@ int sys_timerfd_settime(int fd, int flags,
{
#if defined(__NR_timerfd_settime)
return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value);
-#elif defined(__NR_timerfd_settime64)
+#else
struct __kernel_itimerspec knew_value, kold_value;
int ret;
@@ -72,8 +70,6 @@ int sys_timerfd_settime(int fd, int flags,
__nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
}
return ret;
-#else
- return __nolibc_enosys(__func__, fd, flags, new_value, old_value);
#endif
}
diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h
new file mode 100644
index 000000000000..7ad42b927d2f
--- /dev/null
+++ b/tools/include/nolibc/sys/uio.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * uio for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UIO_H
+#define _NOLIBC_SYS_UIO_H
+
+#include "../sys.h"
+#include <linux/uio.h>
+
+
+/*
+ * ssize_t readv(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_readv(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_readv, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t readv(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_readv(fd, iovec, count));
+}
+
+/*
+ * ssize_t writev(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_writev(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_writev, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t writev(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_writev(fd, iovec, count));
+}
+
+
+#endif /* _NOLIBC_SYS_UIO_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
index 56ddb806da7f..9d9319ba92cb 100644
--- a/tools/include/nolibc/sys/wait.h
+++ b/tools/include/nolibc/sys/wait.h
@@ -16,28 +16,11 @@
/*
* pid_t wait(int *status);
- * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
* pid_t waitpid(pid_t pid, int *status, int options);
* int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
*/
static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-#ifdef __NR_wait4
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-#else
- return __nolibc_enosys(__func__, pid, status, options, rusage);
-#endif
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return __sysret(sys_wait4(pid, status, options, rusage));
-}
-
-static __attribute__((unused))
int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
{
return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
@@ -82,23 +65,29 @@ pid_t waitpid(pid_t pid, int *status, int options)
switch (info.si_code) {
case 0:
- *status = 0;
+ if (status)
+ *status = 0;
break;
case CLD_EXITED:
- *status = (info.si_status & 0xff) << 8;
+ if (status)
+ *status = (info.si_status & 0xff) << 8;
break;
case CLD_KILLED:
- *status = info.si_status & 0x7f;
+ if (status)
+ *status = info.si_status & 0x7f;
break;
case CLD_DUMPED:
- *status = (info.si_status & 0x7f) | 0x80;
+ if (status)
+ *status = (info.si_status & 0x7f) | 0x80;
break;
case CLD_STOPPED:
case CLD_TRAPPED:
- *status = (info.si_status << 8) + 0x7f;
+ if (status)
+ *status = (info.si_status << 8) + 0x7f;
break;
case CLD_CONTINUED:
- *status = 0xffff;
+ if (status)
+ *status = 0xffff;
break;
default:
return -1;
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index d02bc44d2643..48e78f8becf9 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -45,7 +45,7 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res)
{
#if defined(__NR_clock_getres)
return my_syscall2(__NR_clock_getres, clockid, res);
-#elif defined(__NR_clock_getres_time64)
+#else
struct __kernel_timespec kres;
int ret;
@@ -53,8 +53,6 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res)
if (res)
__nolibc_timespec_kernel_to_user(&kres, res);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, res);
#endif
}
@@ -69,7 +67,7 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
{
#if defined(__NR_clock_gettime)
return my_syscall2(__NR_clock_gettime, clockid, tp);
-#elif defined(__NR_clock_gettime64)
+#else
struct __kernel_timespec ktp;
int ret;
@@ -77,8 +75,6 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
if (tp)
__nolibc_timespec_kernel_to_user(&ktp, tp);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, tp);
#endif
}
@@ -93,13 +89,11 @@ int sys_clock_settime(clockid_t clockid, struct timespec *tp)
{
#if defined(__NR_clock_settime)
return my_syscall2(__NR_clock_settime, clockid, tp);
-#elif defined(__NR_clock_settime64)
+#else
struct __kernel_timespec ktp;
__nolibc_timespec_user_to_kernel(tp, &ktp);
return my_syscall2(__NR_clock_settime64, clockid, &ktp);
-#else
- return __nolibc_enosys(__func__, clockid, tp);
#endif
}
@@ -115,7 +109,7 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
{
#if defined(__NR_clock_nanosleep)
return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp);
-#elif defined(__NR_clock_nanosleep_time64)
+#else
struct __kernel_timespec krqtp, krmtp;
int ret;
@@ -124,8 +118,6 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
if (rmtp)
__nolibc_timespec_kernel_to_user(&krmtp, rmtp);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp);
#endif
}
@@ -133,7 +125,8 @@ static __attribute__((unused))
int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp,
struct timespec *rmtp)
{
- return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp));
+ /* Directly return a positive error number */
+ return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp);
}
static __inline__
@@ -145,7 +138,7 @@ double difftime(time_t time1, time_t time2)
static __inline__
int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
{
- return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp);
+ return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp));
}
@@ -198,7 +191,7 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
{
#if defined(__NR_timer_gettime)
return my_syscall2(__NR_timer_gettime, timerid, curr_value);
-#elif defined(__NR_timer_gettime64)
+#else
struct __kernel_itimerspec kcurr_value;
int ret;
@@ -206,8 +199,6 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
__nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
__nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
return ret;
-#else
- return __nolibc_enosys(__func__, timerid, curr_value);
#endif
}
@@ -223,7 +214,7 @@ int sys_timer_settime(timer_t timerid, int flags,
{
#if defined(__NR_timer_settime)
return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
-#elif defined(__NR_timer_settime64)
+#else
struct __kernel_itimerspec knew_value, kold_value;
int ret;
@@ -235,8 +226,6 @@ int sys_timer_settime(timer_t timerid, int flags,
__nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
}
return ret;
-#else
- return __nolibc_enosys(__func__, timerid, flags, new_value, old_value);
#endif
}
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 16c6e9ec9451..470a5f77bc0f 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -70,11 +70,6 @@
#define DT_LNK 0xa
#define DT_SOCK 0xc
-/* commonly an fd_set represents 256 FDs */
-#ifndef FD_SETSIZE
-#define FD_SETSIZE 256
-#endif
-
/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
* values.
*/
@@ -115,48 +110,6 @@
#define EXIT_SUCCESS 0
#define EXIT_FAILURE 1
-#define FD_SETIDXMASK (8 * sizeof(unsigned long))
-#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
-
-/* for select() */
-typedef struct {
- unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
-} fd_set;
-
-#define FD_CLR(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] &= \
- ~(1U << (__fd & FD_SETBITMASK)); \
- } while (0)
-
-#define FD_SET(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] |= \
- 1 << (__fd & FD_SETBITMASK); \
- } while (0)
-
-#define FD_ISSET(fd, set) ({ \
- fd_set *__set = (set); \
- int __fd = (fd); \
- int __r = 0; \
- if (__fd >= 0) \
- __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
-1U << (__fd & FD_SETBITMASK)); \
- __r; \
- })
-
-#define FD_ZERO(set) do { \
- fd_set *__set = (set); \
- int __idx; \
- int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
- for (__idx = 0; __idx < __size; __idx++) \
- __set->fds[__idx] = 0; \
- } while (0)
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 25bfc7732ec7..bb5e80f3f05d 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -33,7 +33,7 @@
static __attribute__((unused))
int sys_faccessat(int fd, const char *path, int amode, int flag)
{
- return my_syscall4(__NR_faccessat, fd, path, amode, flag);
+ return my_syscall4(__NR_faccessat, fd, path, amode, flag);
}
static __attribute__((unused))
@@ -54,7 +54,7 @@ int msleep(unsigned int msecs)
{
struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return (my_timeval.tv_sec * 1000) +
(my_timeval.tv_usec / 1000) +
!!(my_timeval.tv_usec % 1000);
@@ -67,7 +67,7 @@ unsigned int sleep(unsigned int seconds)
{
struct timeval my_timeval = { seconds, 0 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return my_timeval.tv_sec + !!my_timeval.tv_usec;
else
return 0;
@@ -78,7 +78,7 @@ int usleep(unsigned int usecs)
{
struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
- return sys_select(0, 0, 0, 0, &my_timeval);
+ return sys_select(0, NULL, NULL, NULL, &my_timeval);
}
static __attribute__((unused))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2892a45023af..04e0077fb4c9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat)
#define __NR_open_tree_attr 467
__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+/* fs/inode.c */
+#define __NR_file_getattr 468
+__SYSCALL(__NR_file_getattr, sys_file_getattr)
+#define __NR_file_setattr 469
+__SYSCALL(__NR_file_setattr, sys_file_setattr)
+
#undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 470
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index e63a71d3c607..3cd5cf15e3c9 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -597,35 +597,66 @@ struct drm_set_version {
int drm_dd_minor;
};
-/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
struct drm_gem_close {
- /** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/* DRM_IOCTL_GEM_FLINK ioctl argument type */
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
struct drm_gem_flink {
- /** Handle for the object being named */
__u32 handle;
-
- /** Returned global name */
__u32 name;
};
-/* DRM_IOCTL_GEM_OPEN ioctl argument type */
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
struct drm_gem_open {
- /** Name of object being opened */
__u32 name;
-
- /** Returned handle for the object */
__u32 handle;
-
- /** Returned size of the object */
__u64 size;
};
/**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+ __u32 handle;
+ __u32 new_handle;
+};
+
+/**
* DRM_CAP_DUMB_BUFFER
*
* If set to 1, the driver supports creating dumb buffers via the
@@ -1309,6 +1340,14 @@ extern "C" {
*/
#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382..be7d8e060e10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
BPF_MAP_TYPE_ARENA,
+ BPF_MAP_TYPE_INSN_ARRAY,
__MAX_BPF_MAP_TYPE
};
@@ -1430,6 +1431,9 @@ enum {
/* Do not translate kernel bpf_arena pointers to user pointers */
BPF_F_NO_USER_CONV = (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+ BPF_F_RB_OVERWRITE = (1U << 19),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1522,6 +1526,12 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
@@ -1605,6 +1615,16 @@ union bpf_attr {
* continuous.
*/
__u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -4875,7 +4895,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*
- * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * long bpf_d_path(const struct path *path, char *buf, u32 sz)
* Description
* Return full path for given **struct path** object, which
* needs to be the kernel BTF *path* object. The path is
@@ -5602,7 +5622,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5645,7 +5665,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5655,7 +5675,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5676,7 +5696,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -6215,6 +6235,7 @@ enum {
BPF_RB_RING_SIZE = 1,
BPF_RB_CONS_POS = 2,
BPF_RB_PROD_POS = 3,
+ BPF_RB_OVERWRITE_POS = 4,
};
/* BPF ring buffer constants */
@@ -6666,6 +6687,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -7182,6 +7205,7 @@ enum {
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
+ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
};
enum {
@@ -7418,6 +7442,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
@@ -7623,4 +7651,24 @@ enum bpf_kfunc_flags {
BPF_F_PAD_ZEROS = (1ULL << 0),
};
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+ __u32 orig_off;
+ __u32 xlated_off;
+ __u32 jitted_off;
+ __u32 :32;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h
new file mode 100644
index 000000000000..ddba3ca01e39
--- /dev/null
+++ b/tools/include/uapi/linux/genetlink.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_GENERIC_NETLINK_H
+#define _UAPI__LINUX_GENERIC_NETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#define GENL_NAMSIZ 16 /* length of family name */
+
+#define GENL_MIN_ID NLMSG_MIN_TYPE
+#define GENL_MAX_ID 1023
+
+struct genlmsghdr {
+ __u8 cmd;
+ __u8 version;
+ __u16 reserved;
+};
+
+#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr))
+
+#define GENL_ADMIN_PERM 0x01
+#define GENL_CMD_CAP_DO 0x02
+#define GENL_CMD_CAP_DUMP 0x04
+#define GENL_CMD_CAP_HASPOL 0x08
+#define GENL_UNS_ADMIN_PERM 0x10
+
+/*
+ * List of reserved static generic netlink identifiers:
+ */
+#define GENL_ID_CTRL NLMSG_MIN_TYPE
+#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1)
+#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2)
+/* must be last reserved + 1 */
+#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3)
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+enum {
+ CTRL_CMD_UNSPEC,
+ CTRL_CMD_NEWFAMILY,
+ CTRL_CMD_DELFAMILY,
+ CTRL_CMD_GETFAMILY,
+ CTRL_CMD_NEWOPS,
+ CTRL_CMD_DELOPS,
+ CTRL_CMD_GETOPS,
+ CTRL_CMD_NEWMCAST_GRP,
+ CTRL_CMD_DELMCAST_GRP,
+ CTRL_CMD_GETMCAST_GRP, /* unused */
+ CTRL_CMD_GETPOLICY,
+ __CTRL_CMD_MAX,
+};
+
+#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
+
+enum {
+ CTRL_ATTR_UNSPEC,
+ CTRL_ATTR_FAMILY_ID,
+ CTRL_ATTR_FAMILY_NAME,
+ CTRL_ATTR_VERSION,
+ CTRL_ATTR_HDRSIZE,
+ CTRL_ATTR_MAXATTR,
+ CTRL_ATTR_OPS,
+ CTRL_ATTR_MCAST_GROUPS,
+ CTRL_ATTR_POLICY,
+ CTRL_ATTR_OP_POLICY,
+ CTRL_ATTR_OP,
+ __CTRL_ATTR_MAX,
+};
+
+#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
+
+enum {
+ CTRL_ATTR_OP_UNSPEC,
+ CTRL_ATTR_OP_ID,
+ CTRL_ATTR_OP_FLAGS,
+ __CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
+enum {
+ CTRL_ATTR_MCAST_GRP_UNSPEC,
+ CTRL_ATTR_MCAST_GRP_NAME,
+ CTRL_ATTR_MCAST_GRP_ID,
+ __CTRL_ATTR_MCAST_GRP_MAX,
+};
+
+#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+
+enum {
+ CTRL_ATTR_POLICY_UNSPEC,
+ CTRL_ATTR_POLICY_DO,
+ CTRL_ATTR_POLICY_DUMP,
+
+ __CTRL_ATTR_POLICY_DUMP_MAX,
+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
+#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1)
+
+#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */
diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h
new file mode 100644
index 000000000000..aa7958b4e41d
--- /dev/null
+++ b/tools/include/uapi/linux/if_addr.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_IF_ADDR_H
+#define _UAPI__LINUX_IF_ADDR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ IFA_PROTO, /* u8, address protocol */
+ __IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ifa_cacheinfo {
+ __u32 ifa_prefered;
+ __u32 ifa_valid;
+ __u32 cstamp; /* created timestamp, hundredths of seconds */
+ __u32 tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
+#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
+#endif
+
+/* ifa_proto */
+#define IFAPROT_UNSPEC 0
+#define IFAPROT_KERNEL_LO 1 /* loopback */
+#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */
+#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */
+
+#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7415a3863891..52f6000ab020 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -178,6 +178,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -447,6 +448,31 @@ struct kvm_run {
__u64 gpa;
__u64 size;
} memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
@@ -935,6 +961,8 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1571,6 +1599,8 @@ struct kvm_memory_attributes {
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1)
struct kvm_create_guest_memfd {
__u64 size;
diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h
new file mode 100644
index 000000000000..c34a81245f87
--- /dev/null
+++ b/tools/include/uapi/linux/neighbour.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NEIGHBOUR_H
+#define _UAPI__LINUX_NEIGHBOUR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ndmsg {
+ __u8 ndm_family;
+ __u8 ndm_pad1;
+ __u16 ndm_pad2;
+ __s32 ndm_ifindex;
+ __u16 ndm_state;
+ __u8 ndm_flags;
+ __u8 ndm_type;
+};
+
+enum {
+ NDA_UNSPEC,
+ NDA_DST,
+ NDA_LLADDR,
+ NDA_CACHEINFO,
+ NDA_PROBES,
+ NDA_VLAN,
+ NDA_PORT,
+ NDA_VNI,
+ NDA_IFINDEX,
+ NDA_MASTER,
+ NDA_LINK_NETNSID,
+ NDA_SRC_VNI,
+ NDA_PROTOCOL, /* Originator of entry */
+ NDA_NH_ID,
+ NDA_FDB_EXT_ATTRS,
+ NDA_FLAGS_EXT,
+ NDA_NDM_STATE_MASK,
+ NDA_NDM_FLAGS_MASK,
+ __NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ * Neighbor Cache Entry Flags
+ */
+
+#define NTF_USE (1 << 0)
+#define NTF_SELF (1 << 1)
+#define NTF_MASTER (1 << 2)
+#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
+#define NTF_EXT_LEARNED (1 << 4)
+#define NTF_OFFLOADED (1 << 5)
+#define NTF_STICKY (1 << 6)
+#define NTF_ROUTER (1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED (1 << 0)
+#define NTF_EXT_LOCKED (1 << 1)
+#define NTF_EXT_EXT_VALIDATED (1 << 2)
+
+/*
+ * Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Dummy states */
+#define NUD_NOARP 0x40
+#define NUD_PERMANENT 0x80
+#define NUD_NONE 0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
+ * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
+ * states don't make sense and thus are ignored. Such entries don't age and
+ * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
+ *
+ * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the
+ * bridge in response to a host trying to communicate via a locked bridge port
+ * with MAB enabled. Their purpose is to notify user space that a host requires
+ * authentication.
+ *
+ * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by
+ * a user space control plane. The kernel will not remove or invalidate them,
+ * but it can probe them and notify user space when they become reachable.
+ */
+
+struct nda_cacheinfo {
+ __u32 ndm_confirmed;
+ __u32 ndm_used;
+ __u32 ndm_updated;
+ __u32 ndm_refcnt;
+};
+
+/*****************************************************************
+ * Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats {
+ __u64 ndts_allocs;
+ __u64 ndts_destroys;
+ __u64 ndts_hash_grows;
+ __u64 ndts_res_failed;
+ __u64 ndts_lookups;
+ __u64 ndts_hits;
+ __u64 ndts_rcv_probes_mcast;
+ __u64 ndts_rcv_probes_ucast;
+ __u64 ndts_periodic_gc_runs;
+ __u64 ndts_forced_gc_runs;
+ __u64 ndts_table_fulls;
+};
+
+enum {
+ NDTPA_UNSPEC,
+ NDTPA_IFINDEX, /* u32, unchangeable */
+ NDTPA_REFCNT, /* u32, read-only */
+ NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
+ NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
+ NDTPA_RETRANS_TIME, /* u64, msecs */
+ NDTPA_GC_STALETIME, /* u64, msecs */
+ NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
+ NDTPA_QUEUE_LEN, /* u32 */
+ NDTPA_APP_PROBES, /* u32 */
+ NDTPA_UCAST_PROBES, /* u32 */
+ NDTPA_MCAST_PROBES, /* u32 */
+ NDTPA_ANYCAST_DELAY, /* u64, msecs */
+ NDTPA_PROXY_DELAY, /* u64, msecs */
+ NDTPA_PROXY_QLEN, /* u32 */
+ NDTPA_LOCKTIME, /* u64, msecs */
+ NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
+ NDTPA_PAD,
+ NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */
+ __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg {
+ __u8 ndtm_family;
+ __u8 ndtm_pad1;
+ __u16 ndtm_pad2;
+};
+
+struct ndt_config {
+ __u16 ndtc_key_len;
+ __u16 ndtc_entry_size;
+ __u32 ndtc_entries;
+ __u32 ndtc_last_flush; /* delta to now in msecs */
+ __u32 ndtc_last_rand; /* delta to now in msecs */
+ __u32 ndtc_hash_rnd;
+ __u32 ndtc_hash_mask;
+ __u32 ndtc_hash_chain_gc;
+ __u32 ndtc_proxy_qlen;
+};
+
+enum {
+ NDTA_UNSPEC,
+ NDTA_NAME, /* char *, unchangeable */
+ NDTA_THRESH1, /* u32 */
+ NDTA_THRESH2, /* u32 */
+ NDTA_THRESH3, /* u32 */
+ NDTA_CONFIG, /* struct ndt_config, read-only */
+ NDTA_PARMS, /* nested TLV NDTPA_* */
+ NDTA_STATS, /* struct ndt_stats, read-only */
+ NDTA_GC_INTERVAL, /* u64, msecs */
+ NDTA_PAD,
+ __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+ /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY:
+ * - FDB_NOTIFY_BIT - notify on activity/expire for any entry
+ * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications
+ */
+enum {
+ FDB_NOTIFY_BIT = (1 << 0),
+ FDB_NOTIFY_INACTIVE_BIT = (1 << 1)
+};
+
+/* embedded into NDA_FDB_EXT_ATTRS:
+ * [NDA_FDB_EXT_ATTRS] = {
+ * [NFEA_ACTIVITY_NOTIFY]
+ * ...
+ * }
+ */
+enum {
+ NFEA_UNSPEC,
+ NFEA_ACTIVITY_NOTIFY,
+ NFEA_DONT_REFRESH,
+ __NFEA_MAX
+};
+#define NFEA_MAX (__NFEA_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 48eb49aa03d4..e0b579a1df4f 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -2,6 +2,7 @@
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/netdev.yaml */
/* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
#ifndef _UAPI_LINUX_NETDEV_H
#define _UAPI_LINUX_NETDEV_H
@@ -80,6 +81,7 @@ enum netdev_qstats_scope {
enum netdev_napi_threaded {
NETDEV_NAPI_THREADED_DISABLED,
NETDEV_NAPI_THREADED_ENABLED,
+ NETDEV_NAPI_THREADED_BUSY_POLL,
};
enum {
diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h
new file mode 100644
index 000000000000..5a79ccb76701
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETFILTER_H
+#define _UAPI__LINUX_NETFILTER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/* Responses from hook functions. */
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define NF_STOLEN 2
+#define NF_QUEUE 3
+#define NF_REPEAT 4
+#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */
+#define NF_MAX_VERDICT NF_STOP
+
+/* we overload the higher bits for encoding auxiliary data such as the queue
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
+
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
+#define NF_VERDICT_QMASK 0xffff0000
+#define NF_VERDICT_QBITS 16
+
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
+
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
+
+/* only for userspace compatibility */
+#ifndef __KERNEL__
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
+#endif
+
+enum nf_inet_hooks {
+ NF_INET_PRE_ROUTING,
+ NF_INET_LOCAL_IN,
+ NF_INET_FORWARD,
+ NF_INET_LOCAL_OUT,
+ NF_INET_POST_ROUTING,
+ NF_INET_NUMHOOKS,
+ NF_INET_INGRESS = NF_INET_NUMHOOKS,
+};
+
+enum nf_dev_hooks {
+ NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
+ NF_NETDEV_NUMHOOKS
+};
+
+enum {
+ NFPROTO_UNSPEC = 0,
+ NFPROTO_INET = 1,
+ NFPROTO_IPV4 = 2,
+ NFPROTO_ARP = 3,
+ NFPROTO_NETDEV = 5,
+ NFPROTO_BRIDGE = 7,
+ NFPROTO_IPV6 = 10,
+#ifndef __KERNEL__ /* no longer supported by kernel */
+ NFPROTO_DECNET = 12,
+#endif
+ NFPROTO_NUMPROTO,
+};
+
+union nf_inet_addr {
+ __u32 all[4];
+ __be32 ip;
+ __be32 ip6[4];
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+#endif /* _UAPI__LINUX_NETFILTER_H */
diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h
new file mode 100644
index 000000000000..791dfc5ae907
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter_arp.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
+#ifndef __LINUX_ARP_NETFILTER_H
+#define __LINUX_ARP_NETFILTER_H
+
+/* ARP-specific defines for netfilter.
+ * (C)2002 Rusty Russell IBM -- This code is GPL.
+ */
+
+#include <linux/netfilter.h>
+
+/* There is no PF_ARP. */
+#define NF_ARP 0
+
+/* ARP Hooks */
+#define NF_ARP_IN 0
+#define NF_ARP_OUT 1
+#define NF_ARP_FORWARD 2
+
+#ifndef __KERNEL__
+#define NF_ARP_NUMHOOKS 3
+#endif
+
+#endif /* __LINUX_ARP_NETFILTER_H */
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
index 34127653fd00..a25e38d1c874 100644
--- a/tools/include/uapi/linux/nsfs.h
+++ b/tools/include/uapi/linux/nsfs.h
@@ -16,8 +16,6 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
-/* Get the id for a mount namespace */
-#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
/* Translate pid from target pid namespace into the caller's pid namespace. */
#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
/* Return thread-group leader id of pid in the callers pid namespace. */
@@ -42,4 +40,89 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+#ifdef __KERNEL__
+ MNT_NS_ANON_INO = 0xEFFFFFF7U,
+#endif
+};
+
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
+enum init_ns_id {
+ IPC_NS_INIT_ID = 1ULL,
+ UTS_NS_INIT_ID = 2ULL,
+ USER_NS_INIT_ID = 3ULL,
+ PID_NS_INIT_ID = 4ULL,
+ CGROUP_NS_INIT_ID = 5ULL,
+ TIME_NS_INIT_ID = 6ULL,
+ NET_NS_INIT_ID = 7ULL,
+ MNT_NS_INIT_ID = 8ULL,
+#ifdef __KERNEL__
+ NS_LAST_INIT_ID = MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+ TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
+ MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
+ CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
+ UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
+ IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
+ USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
+ PID_NS = (1ULL << 29), /* CLONE_NEWPID */
+ NET_NS = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 ns_id;
+ struct /* listns */ {
+ __u32 ns_type;
+ __u32 spare2;
+ __u64 user_ns_id;
+ };
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b80027..c44a8fb3e418 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -382,6 +382,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */
/*
* 'struct perf_event_attr' contains various attributes that define
@@ -463,7 +464,9 @@ struct perf_event_attr {
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+ defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+ __reserved_1 : 24;
union {
__u32 wakeup_events; /* wake up every n events */
@@ -543,6 +546,7 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+ __u64 config4; /* extension of config3 */
};
/*
@@ -1239,6 +1243,22 @@ enum perf_event_type {
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+ /*
+ * This user callchain capture was deferred until shortly before
+ * returning to user space. Previous samples would have kernel
+ * callchains only and they need to be stitched with this to make full
+ * callchains.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 cookie;
+ * u64 nr;
+ * u64 ips[nr];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CALLCHAIN_DEFERRED = 22,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1269,6 +1289,7 @@ enum perf_callchain_context {
PERF_CONTEXT_HV = (__u64)-32,
PERF_CONTEXT_KERNEL = (__u64)-128,
PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
PERF_CONTEXT_GUEST = (__u64)-2048,
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h
new file mode 100644
index 000000000000..dab9493c791b
--- /dev/null
+++ b/tools/include/uapi/linux/rtnetlink.h
@@ -0,0 +1,848 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/if_link.h>
+#include <linux/if_addr.h>
+#include <linux/neighbour.h>
+
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR 128
+#define RTNL_FAMILY_IP6MR 129
+#define RTNL_FAMILY_MAX 129
+
+/****
+ * Routing/neighbour discovery messages.
+ ****/
+
+/* Types of messages */
+
+enum {
+ RTM_BASE = 16,
+#define RTM_BASE RTM_BASE
+
+ RTM_NEWLINK = 16,
+#define RTM_NEWLINK RTM_NEWLINK
+ RTM_DELLINK,
+#define RTM_DELLINK RTM_DELLINK
+ RTM_GETLINK,
+#define RTM_GETLINK RTM_GETLINK
+ RTM_SETLINK,
+#define RTM_SETLINK RTM_SETLINK
+
+ RTM_NEWADDR = 20,
+#define RTM_NEWADDR RTM_NEWADDR
+ RTM_DELADDR,
+#define RTM_DELADDR RTM_DELADDR
+ RTM_GETADDR,
+#define RTM_GETADDR RTM_GETADDR
+
+ RTM_NEWROUTE = 24,
+#define RTM_NEWROUTE RTM_NEWROUTE
+ RTM_DELROUTE,
+#define RTM_DELROUTE RTM_DELROUTE
+ RTM_GETROUTE,
+#define RTM_GETROUTE RTM_GETROUTE
+
+ RTM_NEWNEIGH = 28,
+#define RTM_NEWNEIGH RTM_NEWNEIGH
+ RTM_DELNEIGH,
+#define RTM_DELNEIGH RTM_DELNEIGH
+ RTM_GETNEIGH,
+#define RTM_GETNEIGH RTM_GETNEIGH
+
+ RTM_NEWRULE = 32,
+#define RTM_NEWRULE RTM_NEWRULE
+ RTM_DELRULE,
+#define RTM_DELRULE RTM_DELRULE
+ RTM_GETRULE,
+#define RTM_GETRULE RTM_GETRULE
+
+ RTM_NEWQDISC = 36,
+#define RTM_NEWQDISC RTM_NEWQDISC
+ RTM_DELQDISC,
+#define RTM_DELQDISC RTM_DELQDISC
+ RTM_GETQDISC,
+#define RTM_GETQDISC RTM_GETQDISC
+
+ RTM_NEWTCLASS = 40,
+#define RTM_NEWTCLASS RTM_NEWTCLASS
+ RTM_DELTCLASS,
+#define RTM_DELTCLASS RTM_DELTCLASS
+ RTM_GETTCLASS,
+#define RTM_GETTCLASS RTM_GETTCLASS
+
+ RTM_NEWTFILTER = 44,
+#define RTM_NEWTFILTER RTM_NEWTFILTER
+ RTM_DELTFILTER,
+#define RTM_DELTFILTER RTM_DELTFILTER
+ RTM_GETTFILTER,
+#define RTM_GETTFILTER RTM_GETTFILTER
+
+ RTM_NEWACTION = 48,
+#define RTM_NEWACTION RTM_NEWACTION
+ RTM_DELACTION,
+#define RTM_DELACTION RTM_DELACTION
+ RTM_GETACTION,
+#define RTM_GETACTION RTM_GETACTION
+
+ RTM_NEWPREFIX = 52,
+#define RTM_NEWPREFIX RTM_NEWPREFIX
+
+ RTM_NEWMULTICAST = 56,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+ RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
+ RTM_GETMULTICAST,
+#define RTM_GETMULTICAST RTM_GETMULTICAST
+
+ RTM_NEWANYCAST = 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+ RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+ RTM_GETANYCAST,
+#define RTM_GETANYCAST RTM_GETANYCAST
+
+ RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
+ RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
+ RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
+
+ RTM_NEWNDUSEROPT = 68,
+#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT
+
+ RTM_NEWADDRLABEL = 72,
+#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+ RTM_DELADDRLABEL,
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
+ RTM_GETADDRLABEL,
+#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
+ RTM_NEWNETCONF = 80,
+#define RTM_NEWNETCONF RTM_NEWNETCONF
+ RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
+ RTM_GETNETCONF = 82,
+#define RTM_GETNETCONF RTM_GETNETCONF
+
+ RTM_NEWMDB = 84,
+#define RTM_NEWMDB RTM_NEWMDB
+ RTM_DELMDB = 85,
+#define RTM_DELMDB RTM_DELMDB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
+ RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
+ RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+ RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
+
+ RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
+ RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+ RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+ RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
+ RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP RTM_NEWLINKPROP
+ RTM_DELLINKPROP,
+#define RTM_DELLINKPROP RTM_DELLINKPROP
+ RTM_GETLINKPROP,
+#define RTM_GETLINKPROP RTM_GETLINKPROP
+
+ RTM_NEWVLAN = 112,
+#define RTM_NEWVLAN RTM_NEWVLAN
+ RTM_DELVLAN,
+#define RTM_DELVLAN RTM_DELVLAN
+ RTM_GETVLAN,
+#define RTM_GETVLAN RTM_GETVLAN
+
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
+ RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL RTM_NEWTUNNEL
+ RTM_DELTUNNEL,
+#define RTM_DELTUNNEL RTM_DELTUNNEL
+ RTM_GETTUNNEL,
+#define RTM_GETTUNNEL RTM_GETTUNNEL
+
+ __RTM_MAX,
+#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
+};
+
+#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
+
+/*
+ Generic structure for encapsulation of optional route information.
+ It is reminiscent of sockaddr, but with sa_family replaced
+ with attribute type.
+ */
+
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+/* Macros to handle rtattributes */
+
+#define RTA_ALIGNTO 4U
+#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
+#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
+ (rta)->rta_len >= sizeof(struct rtattr) && \
+ (rta)->rta_len <= (len))
+#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
+ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
+#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
+#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
+#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
+
+
+
+
+/******************************************************************************
+ * Definitions used in routing table administration.
+ ****/
+
+struct rtmsg {
+ unsigned char rtm_family;
+ unsigned char rtm_dst_len;
+ unsigned char rtm_src_len;
+ unsigned char rtm_tos;
+
+ unsigned char rtm_table; /* Routing table id */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+
+ unsigned rtm_flags;
+};
+
+/* rtm_type */
+
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Gateway or direct route */
+ RTN_LOCAL, /* Accept locally */
+ RTN_BROADCAST, /* Accept locally as broadcast,
+ send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast,
+ but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop */
+ RTN_UNREACHABLE, /* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table */
+ RTN_NAT, /* Translate this address */
+ RTN_XRESOLVE, /* Use external resolver */
+ __RTN_MAX
+};
+
+#define RTN_MAX (__RTN_MAX - 1)
+
+
+/* rtm_protocol */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects;
+ not used by current IPv4 */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel;
+ they are just passed from user and back as is.
+ It will be used by hypothetical multiple routing daemons.
+ Note that protocol values should be standardized in order to
+ avoid conflicts.
+ */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OVN 84 /* OVN daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/* rtm_scope
+
+ Really it is not scope, but sort of distance to the destination.
+ NOWHERE are reserved for not existing destinations, HOST is our
+ local addresses, LINK are destinations, located on directly attached
+ link and UNIVERSE is everywhere in the Universe.
+
+ Intermediate values are also possible f.e. interior routes
+ could be assigned a value between UNIVERSE and LINK.
+*/
+
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE=0,
+/* User defined values */
+ RT_SCOPE_SITE=200,
+ RT_SCOPE_LINK=253,
+ RT_SCOPE_HOST=254,
+ RT_SCOPE_NOWHERE=255
+};
+
+/* rtm_flags */
+
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value
+ * is chosen to avoid conflicts with
+ * other flags defined in
+ * include/uapi/linux/ipv6_route.h
+ */
+
+/* Reserved table identifiers */
+
+enum rt_class_t {
+ RT_TABLE_UNSPEC=0,
+/* User defined values */
+ RT_TABLE_COMPAT=252,
+ RT_TABLE_DEFAULT=253,
+ RT_TABLE_MAIN=254,
+ RT_TABLE_LOCAL=255,
+ RT_TABLE_MAX=0xFFFFFFFF
+};
+
+
+/* Routing message attributes */
+
+enum rtattr_type_t {
+ RTA_UNSPEC,
+ RTA_DST,
+ RTA_SRC,
+ RTA_IIF,
+ RTA_OIF,
+ RTA_GATEWAY,
+ RTA_PRIORITY,
+ RTA_PREFSRC,
+ RTA_METRICS,
+ RTA_MULTIPATH,
+ RTA_PROTOINFO, /* no longer used */
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION, /* no longer used */
+ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
+ RTA_PAD,
+ RTA_UID,
+ RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
+ RTA_NH_ID,
+ RTA_FLOWLABEL,
+ __RTA_MAX
+};
+
+#define RTA_MAX (__RTA_MAX - 1)
+
+#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
+
+/* RTM_MULTIPATH --- array of struct rtnexthop.
+ *
+ * "struct rtnexthop" describes all necessary nexthop information,
+ * i.e. parameters of path to a destination via this nexthop.
+ *
+ * At the moment it is impossible to set different prefsrc, mtu, window
+ * and rtt for different paths from multipath.
+ */
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[];
+};
+
+/* RTM_CACHEINFO */
+
+struct rta_cacheinfo {
+ __u32 rta_clntref;
+ __u32 rta_lastuse;
+ __s32 rta_expires;
+ __u32 rta_error;
+ __u32 rta_used;
+
+#define RTNETLINK_HAVE_PEERINFO 1
+ __u32 rta_id;
+ __u32 rta_ts;
+ __u32 rta_tsage;
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+enum {
+ RTAX_UNSPEC,
+#define RTAX_UNSPEC RTAX_UNSPEC
+ RTAX_LOCK,
+#define RTAX_LOCK RTAX_LOCK
+ RTAX_MTU,
+#define RTAX_MTU RTAX_MTU
+ RTAX_WINDOW,
+#define RTAX_WINDOW RTAX_WINDOW
+ RTAX_RTT,
+#define RTAX_RTT RTAX_RTT
+ RTAX_RTTVAR,
+#define RTAX_RTTVAR RTAX_RTTVAR
+ RTAX_SSTHRESH,
+#define RTAX_SSTHRESH RTAX_SSTHRESH
+ RTAX_CWND,
+#define RTAX_CWND RTAX_CWND
+ RTAX_ADVMSS,
+#define RTAX_ADVMSS RTAX_ADVMSS
+ RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
+ RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+ RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+ RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+ RTAX_RTO_MIN,
+#define RTAX_RTO_MIN RTAX_RTO_MIN
+ RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
+ RTAX_QUICKACK,
+#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
+ RTAX_FASTOPEN_NO_COOKIE,
+#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE
+ __RTAX_MAX
+};
+
+#define RTAX_MAX (__RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1) /* unused */
+#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
+#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
+#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
+ RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG | \
+ RTAX_FEATURE_TCP_USEC_TS)
+
+struct rta_session {
+ __u8 proto;
+ __u8 pad1;
+ __u16 pad2;
+
+ union {
+ struct {
+ __u16 sport;
+ __u16 dport;
+ } ports;
+
+ struct {
+ __u8 type;
+ __u8 code;
+ __u16 ident;
+ } icmpt;
+
+ __u32 spi;
+ } u;
+};
+
+struct rta_mfc_stats {
+ __u64 mfcs_packets;
+ __u64 mfcs_bytes;
+ __u64 mfcs_wrong_if;
+};
+
+/****
+ * General form of address family dependent message.
+ ****/
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum
+{
+ PREFIX_UNSPEC,
+ PREFIX_ADDRESS,
+ PREFIX_CACHEINFO,
+ __PREFIX_MAX
+};
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ __u32 preferred_time;
+ __u32 valid_time;
+};
+
+
+/*****************************************************************
+ * Traffic control messages.
+ ****/
+
+struct tcmsg {
+ unsigned char tcm_family;
+ unsigned char tcm__pad1;
+ unsigned short tcm__pad2;
+ int tcm_ifindex;
+ __u32 tcm_handle;
+ __u32 tcm_parent;
+/* tcm_block_index is used instead of tcm_parent
+ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK
+ */
+#define tcm_block_index tcm_parent
+ __u32 tcm_info;
+};
+
+/* For manipulation of filters in shared block, tcm_ifindex is set to
+ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index
+ * which is the block index.
+ */
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+
+enum {
+ TCA_UNSPEC,
+ TCA_KIND,
+ TCA_OPTIONS,
+ TCA_STATS,
+ TCA_XSTATS,
+ TCA_RATE,
+ TCA_FCNT,
+ TCA_STATS2,
+ TCA_STAB,
+ TCA_PAD,
+ TCA_DUMP_INVISIBLE,
+ TCA_CHAIN,
+ TCA_HW_OFFLOAD,
+ TCA_INGRESS_BLOCK,
+ TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
+ TCA_EXT_WARN_MSG,
+ __TCA_MAX
+};
+
+#define TCA_MAX (__TCA_MAX - 1)
+
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
+#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
+#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
+
+/********************************************************************
+ * Neighbor Discovery userland options
+ ****/
+
+struct nduseroptmsg {
+ unsigned char nduseropt_family;
+ unsigned char nduseropt_pad1;
+ unsigned short nduseropt_opts_len; /* Total length of options */
+ int nduseropt_ifindex;
+ __u8 nduseropt_icmp_type;
+ __u8 nduseropt_icmp_code;
+ unsigned short nduseropt_pad2;
+ unsigned int nduseropt_pad3;
+ /* Followed by one or more ND options */
+};
+
+enum {
+ NDUSEROPT_UNSPEC,
+ NDUSEROPT_SRCADDR,
+ __NDUSEROPT_MAX
+};
+
+#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1)
+
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 1
+#define RTMGRP_NOTIFY 2
+#define RTMGRP_NEIGH 4
+#define RTMGRP_TC 8
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ RTNLGRP_MCTP_IFADDR,
+#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
+ RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
+ RTNLGRP_STATS,
+#define RTNLGRP_STATS RTNLGRP_STATS
+ RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR
+ RTNLGRP_IPV6_MCADDR,
+#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR
+ RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+/* TC action piece */
+struct tcamsg {
+ unsigned char tca_family;
+ unsigned char tca__pad1;
+ unsigned short tca__pad2;
+};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
+ TCA_ROOT_EXT_WARN_MSG,
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
+#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
+#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than
+ * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the
+ * number of actions being dumped stored in for user app's consumption in
+ * TCA_ROOT_COUNT
+ *
+ * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * includes essential action info (kind, index, etc.)
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
+#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON
+#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1)
+
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF (1 << 0)
+#define RTEXT_FILTER_BRVLAN (1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
+#define RTEXT_FILTER_MRP (1 << 4)
+#define RTEXT_FILTER_CFM_CONFIG (1 << 5)
+#define RTEXT_FILTER_CFM_STATUS (1 << 6)
+#define RTEXT_FILTER_MST (1 << 7)
+
+/* End of information exported to user level */
+
+
+
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e2cd558ca0b4..c80204bb72a2 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_utils.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index ab40dbf9f020..b66f5fbfbbb2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -154,7 +154,7 @@ int bump_rlimit_memlock(void)
memlock_bumped = true;
- /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+ /* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */
if (memlock_rlim == 0)
return 0;
@@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
int fd;
@@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+ attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
+ attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
@@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
+ const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 7252150e7ad3..e983a3e40d61 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -54,9 +54,12 @@ struct bpf_map_create_opts {
__s32 value_type_btf_obj_fd;
__u32 token_fd;
+
+ const void *excl_prog_hash;
+ __u32 excl_prog_hash_size;
size_t :0;
};
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field excl_prog_hash_size
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6ff963a491d9..49af4260b8e6 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -4,6 +4,7 @@
#define __BPF_GEN_INTERNAL_H
#include "bpf.h"
+#include "libbpf_internal.h"
struct ksym_relo_desc {
const char *name;
@@ -50,6 +51,7 @@ struct bpf_gen {
__u32 nr_ksyms;
int fd_array;
int nr_fd_array;
+ int hash_insn_offset[SHA256_DWORD_SIZE];
};
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 80c028540656..d4e4e388e625 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -315,20 +315,20 @@ enum libbpf_tristate {
___param, sizeof(___param)); \
})
-extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
- __u32 len__sz, void *aux__prog) __weak __ksym;
-
-#define bpf_stream_printk(stream_id, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\
+extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
+ __u32 len__sz, void *aux__prog) __weak __ksym;
+
+#define bpf_stream_printk(stream_id, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \
})
/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index a8f6cd4841b0..dbe32a5d02cd 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -311,7 +311,7 @@ struct pt_regs___arm64 {
#define __PT_RET_REG regs[31]
#define __PT_FP_REG __unsupported__
#define __PT_RC_REG gpr[3]
-#define __PT_SP_REG sp
+#define __PT_SP_REG gpr[1]
#define __PT_IP_REG nip
#elif defined(bpf_target_sparc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 37682908cb0f..84a4b0abc8be 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -23,7 +23,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
#include "strset.h"
-#include "str_error.h"
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -1062,7 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
}
if (is_mmap) {
@@ -3902,6 +3901,20 @@ err_out:
return err;
}
+/*
+ * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_typedef(struct btf_type *t)
+{
+ long h;
+
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info);
+ return h;
+}
+
static long btf_hash_common(struct btf_type *t)
{
long h;
@@ -3919,6 +3932,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
t1->size == t2->size;
}
+/* Check structural compatibility of two TYPEDEF. */
+static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2)
+{
+ return t1->name_off == t2->name_off &&
+ t1->info == t2->info;
+}
+
/* Calculate type signature hash of INT or TAG. */
static long btf_hash_int_decl_tag(struct btf_type *t)
{
@@ -4845,13 +4865,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
}
}
+static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind)
+{
+ if (kind == BTF_KIND_TYPEDEF)
+ return btf_hash_typedef(t);
+ else
+ return btf_hash_struct(t);
+}
+
+static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind)
+{
+ if (kind == BTF_KIND_TYPEDEF)
+ return btf_equal_typedef(t1, t2);
+ else
+ return btf_shallow_equal_struct(t1, t2);
+}
+
/*
- * Deduplicate struct/union types.
+ * Deduplicate struct/union and typedef types.
*
* For each struct/union type its type signature hash is calculated, taking
* into account type's name, size, number, order and names of fields, but
* ignoring type ID's referenced from fields, because they might not be deduped
- * completely until after reference types deduplication phase. This type hash
+ * completely until after reference types deduplication phase. For each typedef
+ * type, the hash is computed based on the type’s name and size. This type hash
* is used to iterate over all potential canonical types, sharing same hash.
* For each canonical candidate we check whether type graphs that they form
* (through referenced types in fields and so on) are equivalent using algorithm
@@ -4883,18 +4920,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
t = btf_type_by_id(d->btf, type_id);
kind = btf_kind(t);
- if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ if (kind != BTF_KIND_STRUCT &&
+ kind != BTF_KIND_UNION &&
+ kind != BTF_KIND_TYPEDEF)
return 0;
- h = btf_hash_struct(t);
+ h = btf_hash_by_kind(t, kind);
for_each_dedup_cand(d, hash_entry, h) {
__u32 cand_id = hash_entry->value;
int eq;
/*
* Even though btf_dedup_is_equiv() checks for
- * btf_shallow_equal_struct() internally when checking two
- * structs (unions) for equivalence, we need to guard here
+ * btf_equal_by_kind() internally when checking two
+ * structs (unions) or typedefs for equivalence, we need to guard here
* from picking matching FWD type as a dedup candidate.
* This can happen due to hash collision. In such case just
* relying on btf_dedup_is_equiv() would lead to potentially
@@ -4902,7 +4941,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
* FWD and compatible STRUCT/UNION are considered equivalent.
*/
cand_type = btf_type_by_id(d->btf, cand_id);
- if (!btf_shallow_equal_struct(t, cand_type))
+ if (!btf_equal_by_kind(t, cand_type, kind))
continue;
btf_dedup_clear_hypot_map(d);
@@ -4940,18 +4979,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
/*
* Deduplicate reference type.
*
- * Once all primitive and struct/union types got deduplicated, we can easily
+ * Once all primitive, struct/union and typedef types got deduplicated, we can easily
* deduplicate all other (reference) BTF types. This is done in two steps:
*
* 1. Resolve all referenced type IDs into their canonical type IDs. This
- * resolution can be done either immediately for primitive or struct/union types
- * (because they were deduped in previous two phases) or recursively for
+ * resolution can be done either immediately for primitive, struct/union, and typedef
+ * types (because they were deduped in previous two phases) or recursively for
* reference types. Recursion will always terminate at either primitive or
- * struct/union type, at which point we can "unwind" chain of reference types
- * one by one. There is no danger of encountering cycles because in C type
- * system the only way to form type cycle is through struct/union, so any chain
- * of reference types, even those taking part in a type cycle, will inevitably
- * reach struct/union at some point.
+ * struct/union and typedef types, at which point we can "unwind" chain of reference
+ * types one by one. There is no danger of encountering cycles in C, as the only way to
+ * form a type cycle is through struct or union types. Go can form such cycles through
+ * typedef. Thus, any chain of reference types, even those taking part in a type cycle,
+ * will inevitably reach a struct/union or typedef type at some point.
*
* 2. Once all referenced type IDs are resolved into canonical ones, BTF type
* becomes "stable", in the sense that no further deduplication will cause
@@ -4983,7 +5022,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_TYPE_TAG:
ref_type_id = btf_dedup_ref_type(d, t->type);
@@ -5819,7 +5857,7 @@ void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
{
btf->base_btf = (struct btf *)base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
}
int btf__relocate(struct btf *btf, const struct btf *base_btf)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ccfd905f03df..cc01494d6210 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void);
* @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
* ELF BTF section except with a base BTF on top of which split BTF should be
* based
+ * @param base_btf base BTF object
* @return new BTF object instance which has to be eventually freed with
* **btf__free()**
*
@@ -115,6 +116,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
* When that split BTF is loaded against a (possibly changed) base, this
* distilled base BTF will help update references to that (possibly changed)
* base BTF.
+ * @param src_btf source split BTF object
+ * @param new_base_btf pointer to where the new base BTF object pointer will be stored
+ * @param new_split_btf pointer to where the new split BTF object pointer will be stored
+ * @return 0 on success; negative error code, otherwise
*
* Both the new split and its associated new base BTF must be freed by
* the caller.
@@ -264,6 +269,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
* to base BTF kinds, and verify those references are compatible with
* *base_btf*; if they are, *btf* is adjusted such that is re-parented to
* *base_btf* and type ids and strings are adjusted to accommodate this.
+ * @param btf split BTF object to relocate
+ * @param base_btf base BTF object
+ * @return 0 on success; negative error code, otherwise
*
* If successful, 0 is returned and **btf** now has **base_btf** as its
* base.
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index f09f25eccf3c..6388392f49a0 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -21,7 +21,6 @@
#include "hashmap.h"
#include "libbpf.h"
#include "libbpf_internal.h"
-#include "str_error.h"
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 823f83ad819c..295dbda24580 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include "libbpf_internal.h"
-#include "str_error.h"
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 760657f5224c..b842b83e2480 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -6,7 +6,6 @@
#include "libbpf.h"
#include "libbpf_common.h"
#include "libbpf_internal.h"
-#include "str_error.h"
static inline __u64 ptr_to_u64(const void *ptr)
{
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 113ae4abd345..cd5c2543f54d 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <asm/byteorder.h>
#include <linux/filter.h>
#include <sys/param.h>
#include "btf.h"
@@ -13,8 +14,6 @@
#include "hashmap.h"
#include "bpf_gen_internal.h"
#include "skel_internal.h"
-#include <asm/byteorder.h>
-#include "str_error.h"
#define MAX_USED_MAPS 64
#define MAX_USED_PROGS 32
@@ -110,6 +109,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+static void emit_signature_match(struct bpf_gen *gen);
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
{
@@ -152,6 +152,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ emit_signature_match(gen);
}
static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -368,6 +370,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
__emit_sys_close(gen);
}
+static void compute_sha_update_offsets(struct bpf_gen *gen);
+
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
{
int i;
@@ -394,6 +398,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
blob_fd_array_off(gen, i));
emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ compute_sha_update_offsets(gen);
+
pr_debug("gen: finish %s\n", errstr(gen->error));
if (!gen->error) {
struct gen_loader_opts *opts = gen->opts;
@@ -446,6 +453,22 @@ void bpf_gen__free(struct bpf_gen *gen)
_val; \
})
+static void compute_sha_update_offsets(struct bpf_gen *gen)
+{
+ __u64 sha[SHA256_DWORD_SIZE];
+ __u64 sha_dw;
+ int i;
+
+ libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ struct bpf_insn *insn =
+ (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
+ sha_dw = tgt_endian(sha[i]);
+ insn[0].imm = (__u32)sha_dw;
+ insn[1].imm = sha_dw >> 32;
+ }
+}
+
void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
__u32 btf_raw_size)
{
@@ -557,6 +580,29 @@ void bpf_gen__map_create(struct bpf_gen *gen,
emit_sys_close_stack(gen, stack_off(inner_map_fd));
}
+static void emit_signature_match(struct bpf_gen *gen)
+{
+ __s64 off;
+ int i;
+
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
+ 0, 0, 0, 0));
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
+ gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
+
+ off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+ if (is_simm16(off)) {
+ emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
+ emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+ }
+}
+
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
enum bpf_attach_type type)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f5a81b672e1..3dc8a8078815 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,7 +50,6 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
-#include "str_error.h"
#include "libbpf_internal.h"
#include "hashmap.h"
#include "bpf_gen_internal.h"
@@ -191,6 +190,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
[BPF_MAP_TYPE_ARENA] = "arena",
+ [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array",
};
static const char * const prog_type_name[] = {
@@ -318,8 +318,6 @@ static void pr_perm_msg(int err)
buf);
}
-#define STRERR_BUFSIZE 128
-
/* Copied from tools/perf/util/util.h */
#ifndef zfree
# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
@@ -372,6 +370,7 @@ enum reloc_type {
RELO_EXTERN_CALL,
RELO_SUBPROG_ADDR,
RELO_CORE,
+ RELO_INSN_ARRAY,
};
struct reloc_desc {
@@ -382,7 +381,16 @@ struct reloc_desc {
struct {
int map_idx;
int sym_off;
- int ext_idx;
+ /*
+ * The following two fields can be unionized, as the
+ * ext_idx field is used for extern symbols, and the
+ * sym_size is used for jump tables, which are never
+ * extern
+ */
+ union {
+ int ext_idx;
+ int sym_size;
+ };
};
};
};
@@ -424,6 +432,11 @@ struct bpf_sec_def {
libbpf_prog_attach_fn_t prog_attach_fn;
};
+struct bpf_light_subprog {
+ __u32 sec_insn_off;
+ __u32 sub_insn_off;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -496,6 +509,10 @@ struct bpf_program {
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
+ __u8 hash[SHA256_DIGEST_LENGTH];
+
+ struct bpf_light_subprog *subprogs;
+ __u32 subprog_cnt;
};
struct bpf_struct_ops {
@@ -575,6 +592,7 @@ struct bpf_map {
bool autocreate;
bool autoattach;
__u64 map_extra;
+ struct bpf_program *excl_prog;
};
enum extern_type {
@@ -668,6 +686,7 @@ struct elf_state {
int symbols_shndx;
bool has_st_ops;
int arena_data_shndx;
+ int jumptables_data_shndx;
};
struct usdt_manager;
@@ -739,6 +758,16 @@ struct bpf_object {
void *arena_data;
size_t arena_data_sz;
+ void *jumptables_data;
+ size_t jumptables_data_sz;
+
+ struct {
+ struct bpf_program *prog;
+ int sym_off;
+ int fd;
+ } *jumptable_maps;
+ size_t jumptable_map_cnt;
+
struct kern_feature_cache *feat_cache;
char *token_path;
int token_fd;
@@ -765,6 +794,7 @@ void bpf_program__unload(struct bpf_program *prog)
zfree(&prog->func_info);
zfree(&prog->line_info);
+ zfree(&prog->subprogs);
}
static void bpf_program__exit(struct bpf_program *prog)
@@ -1013,35 +1043,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
const struct btf_member *kern_data_member;
struct btf *btf = NULL;
__s32 kern_vtype_id, kern_type_id;
- char tname[256];
+ char tname[192], stname[256];
__u32 i;
snprintf(tname, sizeof(tname), "%.*s",
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
- kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
- &btf, mod_btf);
- if (kern_type_id < 0) {
- pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
- tname);
- return kern_type_id;
- }
- kern_type = btf__type_by_id(btf, kern_type_id);
+ snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
- /* Find the corresponding "map_value" type that will be used
- * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
- * find "struct bpf_struct_ops_tcp_congestion_ops" from the
- * btf_vmlinux.
+ /* Look for the corresponding "map_value" type that will be used
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
+ * and the mod_btf.
+ * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
*/
- kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
- tname, BTF_KIND_STRUCT);
+ kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
if (kern_vtype_id < 0) {
- pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
- STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
return kern_vtype_id;
}
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (kern_type_id < 0) {
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
+ return kern_type_id;
+ }
+ kern_type = btf__type_by_id(btf, kern_type_id);
+
/* Find "struct tcp_congestion_ops" from
* struct bpf_struct_ops_tcp_congestion_ops {
* [ ... ]
@@ -1054,8 +1082,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
break;
}
if (i == btf_vlen(kern_vtype)) {
- pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
- tname, STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
+ tname, stname);
return -EINVAL;
}
@@ -2999,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
data = elf_sec_data(obj, scn);
- if (!scn || !data) {
+ if (!data) {
pr_warn("elf: failed to get %s map definitions for %s\n",
MAPS_ELF_SEC, obj->path);
return -EINVAL;
@@ -3945,6 +3973,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, ARENA_SEC) == 0) {
obj->efile.arena_data = data;
obj->efile.arena_data_shndx = idx;
+ } else if (strcmp(name, JUMPTABLES_SEC) == 0) {
+ obj->jumptables_data = malloc(data->d_size);
+ if (!obj->jumptables_data)
+ return -ENOMEM;
+ memcpy(obj->jumptables_data, data->d_buf, data->d_size);
+ obj->jumptables_data_sz = data->d_size;
+ obj->efile.jumptables_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -4485,6 +4520,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
}
}
+static int bpf_prog_compute_hash(struct bpf_program *prog)
+{
+ struct bpf_insn *purged;
+ int i, err = 0;
+
+ purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
+ if (!purged)
+ return -ENOMEM;
+
+ /* If relocations have been done, the map_fd needs to be
+ * discarded for the digest calculation.
+ */
+ for (i = 0; i < prog->insns_cnt; i++) {
+ purged[i] = prog->insns[i];
+ if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+ purged[i].imm = 0;
+ i++;
+ if (i >= prog->insns_cnt ||
+ prog->insns[i].code != 0 ||
+ prog->insns[i].dst_reg != 0 ||
+ prog->insns[i].src_reg != 0 ||
+ prog->insns[i].off != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ purged[i] = prog->insns[i];
+ purged[i].imm = 0;
+ }
+ }
+ libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
+ prog->hash);
+out:
+ free(purged);
+ return err;
+}
+
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
__u32 insn_idx, const char *sym_name,
@@ -4599,6 +4672,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
+ /* jump table data relocation */
+ if (shdr_idx == obj->efile.jumptables_data_shndx) {
+ reloc_desc->type = RELO_INSN_ARRAY;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = -1;
+ reloc_desc->sym_off = sym->st_value;
+ reloc_desc->sym_size = sym->st_size;
+ return 0;
+ }
+
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -5093,6 +5176,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
return false;
}
+ /*
+ * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
+ * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
+ * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
+ * bpf_get_map_info_by_fd() when checking for compatibility with an
+ * existing DEVMAP.
+ */
+ if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
+ map_info.map_flags &= ~BPF_F_RDONLY_PROG;
+
return (map_info.type == map->def.type &&
map_info.key_size == map->def.key_size &&
map_info.value_size == map->def.value_size &&
@@ -5224,6 +5317,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.token_fd = obj->token_fd;
if (obj->token_fd)
create_attr.map_flags |= BPF_F_TOKEN_FD;
+ if (map->excl_prog) {
+ err = bpf_prog_compute_hash(map->excl_prog);
+ if (err)
+ return err;
+
+ create_attr.excl_prog_hash = map->excl_prog->hash;
+ create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
+ }
if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -6091,6 +6192,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
}
+static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
+{
+ size_t i;
+
+ for (i = 0; i < obj->jumptable_map_cnt; i++) {
+ /*
+ * This might happen that same offset is used for two different
+ * programs (as jump tables can be the same). However, for
+ * different programs different maps should be created.
+ */
+ if (obj->jumptable_maps[i].sym_off == sym_off &&
+ obj->jumptable_maps[i].prog == prog)
+ return obj->jumptable_maps[i].fd;
+ }
+
+ return -ENOENT;
+}
+
+static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
+{
+ size_t cnt = obj->jumptable_map_cnt;
+ size_t size = sizeof(obj->jumptable_maps[0]);
+ void *tmp;
+
+ tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
+ if (!tmp)
+ return -ENOMEM;
+
+ obj->jumptable_maps = tmp;
+ obj->jumptable_maps[cnt].prog = prog;
+ obj->jumptable_maps[cnt].sym_off = sym_off;
+ obj->jumptable_maps[cnt].fd = map_fd;
+ obj->jumptable_map_cnt++;
+
+ return 0;
+}
+
+static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
+{
+ int i;
+
+ for (i = prog->subprog_cnt - 1; i >= 0; i--) {
+ if (insn_idx >= prog->subprogs[i].sub_insn_off)
+ return i;
+ }
+
+ return -1;
+}
+
+static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
+{
+ const __u32 jt_entry_size = 8;
+ int sym_off = relo->sym_off;
+ int jt_size = relo->sym_size;
+ __u32 max_entries = jt_size / jt_entry_size;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+ struct bpf_insn_array_value val = {};
+ int subprog_idx;
+ int map_fd, err;
+ __u64 insn_off;
+ __u64 *jt;
+ __u32 i;
+
+ map_fd = find_jt_map(obj, prog, sym_off);
+ if (map_fd >= 0)
+ return map_fd;
+
+ if (sym_off % jt_entry_size) {
+ pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
+ sym_off, jt_entry_size);
+ return -EINVAL;
+ }
+
+ if (jt_size % jt_entry_size) {
+ pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
+ jt_size, jt_entry_size);
+ return -EINVAL;
+ }
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
+ 4, value_size, max_entries, NULL);
+ if (map_fd < 0)
+ return map_fd;
+
+ if (!obj->jumptables_data) {
+ pr_warn("map '.jumptables': ELF file is missing jump table data\n");
+ err = -EINVAL;
+ goto err_close;
+ }
+ if (sym_off + jt_size > obj->jumptables_data_sz) {
+ pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
+ obj->jumptables_data_sz, sym_off + jt_size);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ subprog_idx = -1; /* main program */
+ if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
+ pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
+ err = -EINVAL;
+ goto err_close;
+ }
+ if (prog->subprogs)
+ subprog_idx = find_subprog_idx(prog, relo->insn_idx);
+
+ jt = (__u64 *)(obj->jumptables_data + sym_off);
+ for (i = 0; i < max_entries; i++) {
+ /*
+ * The offset should be made to be relative to the beginning of
+ * the main function, not the subfunction.
+ */
+ insn_off = jt[i]/sizeof(struct bpf_insn);
+ if (subprog_idx >= 0) {
+ insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
+ insn_off += prog->subprogs[subprog_idx].sub_insn_off;
+ } else {
+ insn_off -= prog->sec_insn_off;
+ }
+
+ /*
+ * LLVM-generated jump tables contain u64 records, however
+ * should contain values that fit in u32.
+ */
+ if (insn_off > UINT32_MAX) {
+ pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
+ (long long)jt[i], sym_off + i * jt_entry_size);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ val.orig_off = insn_off;
+ err = bpf_map_update_elem(map_fd, &i, &val, 0);
+ if (err)
+ goto err_close;
+ }
+
+ err = bpf_map_freeze(map_fd);
+ if (err)
+ goto err_close;
+
+ err = add_jt_map(obj, prog, sym_off, map_fd);
+ if (err)
+ goto err_close;
+
+ return map_fd;
+
+err_close:
+ close(map_fd);
+ return err;
+}
+
/* Relocate data references within program code:
* - map references;
* - global variable references;
@@ -6182,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_CORE:
/* will be handled by bpf_program_record_relos() */
break;
+ case RELO_INSN_ARRAY: {
+ int map_fd;
+
+ map_fd = create_jt_map(obj, prog, relo);
+ if (map_fd < 0) {
+ pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
+ prog->name, i, relo->sym_off);
+ return map_fd;
+ }
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn->imm = map_fd;
+ insn->off = 0;
+ }
+ break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
prog->name, i, relo->type);
@@ -6379,36 +6645,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
return 0;
}
+static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+ size_t size = sizeof(main_prog->subprogs[0]);
+ int cnt = main_prog->subprog_cnt;
+ void *tmp;
+
+ tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
+ if (!tmp)
+ return -ENOMEM;
+
+ main_prog->subprogs = tmp;
+ main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
+ main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
+ main_prog->subprog_cnt++;
+
+ return 0;
+}
+
static int
bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *subprog)
{
- struct bpf_insn *insns;
- size_t new_cnt;
- int err;
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insns_cnt;
- subprog->sub_insn_off = main_prog->insns_cnt;
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
- new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
- insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
- if (!insns) {
- pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
- return -ENOMEM;
- }
- main_prog->insns = insns;
- main_prog->insns_cnt = new_cnt;
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
- memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
- subprog->insns_cnt * sizeof(*insns));
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
- pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
- main_prog->name, subprog->insns_cnt, subprog->name);
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
- /* The subprog insns are now appended. Append its relos too. */
- err = append_subprog_relos(main_prog, subprog);
- if (err)
- return err;
- return 0;
+ err = save_subprog_offsets(main_prog, subprog);
+ if (err) {
+ pr_warn("prog '%s': failed to add subprog offsets: %s\n",
+ main_prog->name, errstr(err));
+ return err;
+ }
+
+ return 0;
}
static int
@@ -9175,6 +9467,13 @@ void bpf_object__close(struct bpf_object *obj)
zfree(&obj->arena_data);
+ zfree(&obj->jumptables_data);
+ obj->jumptables_data_sz = 0;
+
+ for (i = 0; i < obj->jumptable_map_cnt; i++)
+ close(obj->jumptable_maps[i].fd);
+ zfree(&obj->jumptable_maps);
+
free(obj);
}
@@ -10514,6 +10813,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
return 0;
}
+int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
+{
+ if (map_is_created(map)) {
+ pr_warn("exclusive programs must be set before map creation\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ if (map->obj != prog->obj) {
+ pr_warn("excl_prog and map must be from the same bpf object\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ map->excl_prog = prog;
+ return 0;
+}
+
+struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
+{
+ return map->excl_prog;
+}
+
static struct bpf_map *
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
{
@@ -11251,8 +11571,6 @@ static const char *arch_specific_syscall_pfx(void)
return "ia32";
#elif defined(__s390x__)
return "s390x";
-#elif defined(__s390__)
- return "s390";
#elif defined(__arm__)
return "arm";
#elif defined(__aarch64__)
@@ -12039,8 +12357,6 @@ static const char *arch_specific_lib_paths(void)
return "/lib/i386-linux-gnu";
#elif defined(__s390x__)
return "/lib/s390x-linux-gnu";
-#elif defined(__s390__)
- return "/lib/s390-linux-gnu";
#elif defined(__arm__) && defined(__SOFTFP__)
return "/lib/arm-linux-gnueabi";
#elif defined(__arm__) && !defined(__SOFTFP__)
@@ -13784,8 +14100,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
return libbpf_err(-EINVAL);
if (attach_prog_fd && !attach_func_name) {
- /* remember attach_prog_fd and let bpf_program__load() find
- * BTF ID during the program load
+ /* Store attach_prog_fd. The BTF ID will be resolved later during
+ * the normal object/program load phase.
*/
prog->attach_prog_fd = attach_prog_fd;
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 455a957cb702..65e68e964b89 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,8 +24,25 @@
extern "C" {
#endif
+/**
+ * @brief **libbpf_major_version()** provides the major version of libbpf.
+ * @return An integer, the major version number
+ */
LIBBPF_API __u32 libbpf_major_version(void);
+
+/**
+ * @brief **libbpf_minor_version()** provides the minor version of libbpf.
+ * @return An integer, the minor version number
+ */
LIBBPF_API __u32 libbpf_minor_version(void);
+
+/**
+ * @brief **libbpf_version_string()** provides the version of libbpf in a
+ * human-readable form, e.g., "v1.7".
+ * @return Pointer to a static string containing the version
+ *
+ * The format is *not* a part of a stable API and may change in the future.
+ */
LIBBPF_API const char *libbpf_version_string(void);
enum libbpf_errno {
@@ -49,6 +66,14 @@ enum libbpf_errno {
__LIBBPF_ERRNO__END,
};
+/**
+ * @brief **libbpf_strerror()** converts the provided error code into a
+ * human-readable string.
+ * @param err The error code to convert
+ * @param buf Pointer to a buffer where the error message will be stored
+ * @param size The number of bytes in the buffer
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
/**
@@ -252,7 +277,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
* @return 0, on success; negative error code, otherwise, error code is
* stored in errno
*/
-int bpf_object__prepare(struct bpf_object *obj);
+LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
/**
* @brief **bpf_object__load()** loads BPF object into kernel.
@@ -423,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
/**
* @brief **bpf_program__unpin()** unpins the BPF program from a file
- * in the BPFFS specified by a path. This decrements the programs
+ * in the BPFFS specified by a path. This decrements program's in-kernel
* reference count.
*
* The file pinning the BPF program can also be unlinked by a different
@@ -456,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
/**
* @brief **bpf_link__unpin()** unpins the BPF link from a file
- * in the BPFFS specified by a path. This decrements the links
- * reference count.
+ * in the BPFFS. This decrements link's in-kernel reference count.
*
* The file pinning the BPF link can also be unlinked by a different
* process in which case this function will return an error.
*
- * @param prog BPF program to unpin
- * @param path file path to the pin in a BPF file system
+ * @param link BPF link to unpin
* @return 0, on success; negative error code, otherwise
*/
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
@@ -970,8 +993,13 @@ LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
* - fentry/fexit/fmod_ret;
* - lsm;
* - freplace.
- * @param prog BPF program to set the attach type for
- * @param type attach type to set the BPF map to have
+ * @param prog BPF program to configure; must be not yet loaded.
+ * @param attach_prog_fd FD of target BPF program (for freplace/extension).
+ * If >0 and func name omitted, defers BTF ID resolution.
+ * @param attach_func_name Target function name. Used either with
+ * attach_prog_fd to find destination BTF type ID in that BPF program, or
+ * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID.
+ * Must be provided if attach_prog_fd is 0.
* @return error code; or 0 if no error occurred.
*/
LIBBPF_API int
@@ -1073,6 +1101,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
/**
* @brief **bpf_map__set_value_size()** sets map value size.
* @param map the BPF map instance
+ * @param size the new value size
* @return 0, on success; negative error, otherwise
*
* There is a special case for maps with associated memory-mapped regions, like
@@ -1177,7 +1206,7 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__lookup_elem()** is high-level equivalent of
@@ -1201,7 +1230,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__update_elem()** is high-level equivalent of
@@ -1217,7 +1246,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
* @param map BPF map to delete element from
* @param key pointer to memory containing bytes of the key
* @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__delete_elem()** is high-level equivalent of
@@ -1240,7 +1269,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
@@ -1266,6 +1295,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
*/
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
const void *cur_key, void *next_key, size_t key_sz);
+/**
+ * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
+ * specified program. This must be called *before* the map is created.
+ *
+ * @param map BPF map to make exclusive.
+ * @param prog BPF program to be the exclusive user of the map. Must belong
+ * to the same bpf_object as the map.
+ * @return 0 on success; a negative error code otherwise.
+ *
+ * This function must be called after the BPF object is opened but before
+ * it is loaded. Once the object is loaded, only the specified program
+ * will be able to access the map's contents.
+ */
+LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
+
+/**
+ * @brief **bpf_map__exclusive_program()** returns the exclusive program
+ * that is registered with the map (if any).
+ * @param map BPF map to which the exclusive program is registered.
+ * @return the registered exclusive program.
+ */
+LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
struct bpf_xdp_set_link_opts {
size_t sz;
@@ -1590,6 +1641,7 @@ struct perf_buffer_opts {
* @param sample_cb function called on each received data record
* @param lost_cb function called when record loss has occurred
* @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @param opts optional parameters for the perf buffer, can be null
* @return a new instance of struct perf_buffer on success, NULL on error with
* *errno* containing an error code
*/
@@ -1810,9 +1862,10 @@ struct gen_loader_opts {
const char *insns;
__u32 data_sz;
__u32 insns_sz;
+ bool gen_hash;
};
-#define gen_loader_opts__last_field insns_sz
+#define gen_loader_opts__last_field gen_hash
LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
struct gen_loader_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d7bd463e7017..8ed8749907d4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -448,4 +448,7 @@ LIBBPF_1.6.0 {
} LIBBPF_1.5.0;
LIBBPF_1.7.0 {
+ global:
+ bpf_map__set_exclusive_program;
+ bpf_map__exclusive_program;
} LIBBPF_1.6.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
deleted file mode 100644
index 6b180172ec6b..000000000000
--- a/tools/lib/bpf/libbpf_errno.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
- * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015 Huawei Inc.
- * Copyright (C) 2017 Nicira, Inc.
- */
-
-#undef _GNU_SOURCE
-#include <stdio.h>
-#include <string.h>
-
-#include "libbpf.h"
-#include "libbpf_internal.h"
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
-#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
-#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
-
-static const char *libbpf_strerror_table[NR_ERRNO] = {
- [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
- [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
- [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
- [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
- [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
- [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
- [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
- [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
- [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
- [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
- [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
- [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
- [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
-};
-
-int libbpf_strerror(int err, char *buf, size_t size)
-{
- int ret;
-
- if (!buf || !size)
- return libbpf_err(-EINVAL);
-
- err = err > 0 ? err : -err;
-
- if (err < __LIBBPF_ERRNO__START) {
- ret = strerror_r(err, buf, size);
- buf[size - 1] = '\0';
- return libbpf_err_errno(ret);
- }
-
- if (err < __LIBBPF_ERRNO__END) {
- const char *msg;
-
- msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
- ret = snprintf(buf, size, "%s", msg);
- buf[size - 1] = '\0';
- /* The length of the buf and msg is positive.
- * A negative number may be returned only when the
- * size exceeds INT_MAX. Not likely to appear.
- */
- if (ret >= size)
- return libbpf_err(-ERANGE);
- return 0;
- }
-
- ret = snprintf(buf, size, "Unknown libbpf error %d", err);
- buf[size - 1] = '\0';
- if (ret >= size)
- return libbpf_err(-ERANGE);
- return libbpf_err(-ENOENT);
-}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 477a3b3389a0..fc59b21b51b5 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -74,6 +74,8 @@
#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
#endif
+#define JUMPTABLES_SEC ".jumptables"
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -172,6 +174,16 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+/**
+ * @brief **libbpf_errstr()** returns string corresponding to numeric errno
+ * @param err negative numeric errno
+ * @return pointer to string representation of the errno, that is invalidated
+ * upon the next call.
+ */
+const char *libbpf_errstr(int err);
+
+#define errstr(err) libbpf_errstr(err)
+
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
@@ -712,6 +724,11 @@ static inline bool is_pow_of_2(size_t x)
return x && (x & (x - 1)) == 0;
}
+static inline __u32 ror32(__u32 v, int bits)
+{
+ return (v >> bits) | (v << (32 - bits));
+}
+
#define PROG_LOAD_ATTEMPTS 5
int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
@@ -736,4 +753,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
int probe_fd(int fd);
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9dfbe7750f56..bccf4bb747e1 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
break;
+ case BPF_MAP_TYPE_INSN_ARRAY:
+ key_size = sizeof(__u32);
+ value_size = sizeof(struct bpf_insn_array_value);
+ break;
case BPF_MAP_TYPE_UNSPEC:
default:
return -EOPNOTSUPP;
diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c
new file mode 100644
index 000000000000..ac3beae54cf6
--- /dev/null
+++ b/tools/lib/bpf/libbpf_utils.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
+ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
+ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
+ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
+ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
+ [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
+ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
+ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
+ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
+ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+ int ret;
+
+ if (!buf || !size)
+ return libbpf_err(-EINVAL);
+
+ err = err > 0 ? err : -err;
+
+ if (err < __LIBBPF_ERRNO__START) {
+ ret = strerror_r(err, buf, size);
+ buf[size - 1] = '\0';
+ return libbpf_err_errno(ret);
+ }
+
+ if (err < __LIBBPF_ERRNO__END) {
+ const char *msg;
+
+ msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+ ret = snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ /* The length of the buf and msg is positive.
+ * A negative number may be returned only when the
+ * size exceeds INT_MAX. Not likely to appear.
+ */
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
+ return 0;
+ }
+
+ ret = snprintf(buf, size, "Unknown libbpf error %d", err);
+ buf[size - 1] = '\0';
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
+ return libbpf_err(-ENOENT);
+}
+
+const char *libbpf_errstr(int err)
+{
+ static __thread char buf[12];
+
+ if (err > 0)
+ err = -err;
+
+ switch (err) {
+ case -E2BIG: return "-E2BIG";
+ case -EACCES: return "-EACCES";
+ case -EADDRINUSE: return "-EADDRINUSE";
+ case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL";
+ case -EAGAIN: return "-EAGAIN";
+ case -EALREADY: return "-EALREADY";
+ case -EBADF: return "-EBADF";
+ case -EBADFD: return "-EBADFD";
+ case -EBUSY: return "-EBUSY";
+ case -ECANCELED: return "-ECANCELED";
+ case -ECHILD: return "-ECHILD";
+ case -EDEADLK: return "-EDEADLK";
+ case -EDOM: return "-EDOM";
+ case -EEXIST: return "-EEXIST";
+ case -EFAULT: return "-EFAULT";
+ case -EFBIG: return "-EFBIG";
+ case -EILSEQ: return "-EILSEQ";
+ case -EINPROGRESS: return "-EINPROGRESS";
+ case -EINTR: return "-EINTR";
+ case -EINVAL: return "-EINVAL";
+ case -EIO: return "-EIO";
+ case -EISDIR: return "-EISDIR";
+ case -ELOOP: return "-ELOOP";
+ case -EMFILE: return "-EMFILE";
+ case -EMLINK: return "-EMLINK";
+ case -EMSGSIZE: return "-EMSGSIZE";
+ case -ENAMETOOLONG: return "-ENAMETOOLONG";
+ case -ENFILE: return "-ENFILE";
+ case -ENODATA: return "-ENODATA";
+ case -ENODEV: return "-ENODEV";
+ case -ENOENT: return "-ENOENT";
+ case -ENOEXEC: return "-ENOEXEC";
+ case -ENOLINK: return "-ENOLINK";
+ case -ENOMEM: return "-ENOMEM";
+ case -ENOSPC: return "-ENOSPC";
+ case -ENOTBLK: return "-ENOTBLK";
+ case -ENOTDIR: return "-ENOTDIR";
+ case -ENOTSUPP: return "-ENOTSUPP";
+ case -ENOTTY: return "-ENOTTY";
+ case -ENXIO: return "-ENXIO";
+ case -EOPNOTSUPP: return "-EOPNOTSUPP";
+ case -EOVERFLOW: return "-EOVERFLOW";
+ case -EPERM: return "-EPERM";
+ case -EPIPE: return "-EPIPE";
+ case -EPROTO: return "-EPROTO";
+ case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT";
+ case -ERANGE: return "-ERANGE";
+ case -EROFS: return "-EROFS";
+ case -ESPIPE: return "-ESPIPE";
+ case -ESRCH: return "-ESRCH";
+ case -ETXTBSY: return "-ETXTBSY";
+ case -EUCLEAN: return "-EUCLEAN";
+ case -EXDEV: return "-EXDEV";
+ default:
+ snprintf(buf, sizeof(buf), "%d", err);
+ return buf;
+ }
+}
+
+static inline __u32 get_unaligned_be32(const void *p)
+{
+ __be32 val;
+
+ memcpy(&val, p, sizeof(val));
+ return be32_to_cpu(val);
+}
+
+static inline void put_unaligned_be32(__u32 val, void *p)
+{
+ __be32 be_val = cpu_to_be32(val);
+
+ memcpy(p, &be_val, sizeof(be_val));
+}
+
+#define SHA256_BLOCK_LENGTH 64
+#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
+#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
+#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
+#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
+
+static const __u32 sha256_K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \
+ { \
+ __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
+ d += tmp; \
+ h = tmp + Sigma_0(a) + Maj(a, b, c); \
+ }
+
+static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
+{
+ while (nblocks--) {
+ __u32 a = state[0];
+ __u32 b = state[1];
+ __u32 c = state[2];
+ __u32 d = state[3];
+ __u32 e = state[4];
+ __u32 f = state[5];
+ __u32 g = state[6];
+ __u32 h = state[7];
+ __u32 w[64];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ w[i] = get_unaligned_be32(&data[4 * i]);
+ for (; i < ARRAY_SIZE(w); i++)
+ w[i] = sigma_1(w[i - 2]) + w[i - 7] +
+ sigma_0(w[i - 15]) + w[i - 16];
+ for (i = 0; i < ARRAY_SIZE(w); i += 8) {
+ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+ }
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ data += SHA256_BLOCK_LENGTH;
+ }
+}
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
+{
+ __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+ const __be64 bitcount = cpu_to_be64((__u64)len * 8);
+ __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
+ size_t final_len = len % SHA256_BLOCK_LENGTH;
+ int i;
+
+ sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
+
+ memcpy(final_data, data + len - final_len, final_len);
+ final_data[final_len] = 0x80;
+ final_len = roundup(final_len + 9, SHA256_BLOCK_LENGTH);
+ memcpy(&final_data[final_len - 8], &bitcount, 8);
+
+ sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
+
+ for (i = 0; i < ARRAY_SIZE(state); i++)
+ put_unaligned_be32(state[i], &out[4 * i]);
+}
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index a469e5d4fee7..f4403e3cf994 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -25,7 +25,6 @@
#include "btf.h"
#include "libbpf_internal.h"
#include "strset.h"
-#include "str_error.h"
#define BTF_EXTERN_SEC ".extern"
@@ -2026,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
return 0;
}
+
+ if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0)
+ goto add_sym;
}
if (sym_bind == STB_LOCAL)
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 2b83c98a1137..6eea5edba58a 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -64,7 +64,6 @@ enum libbpf_print_level {
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
-#include "str_error.h"
#include "libbpf_internal.h"
#endif
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 9702b70da444..00ec4837a06d 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -21,7 +21,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
#include "bpf.h"
-#include "str_error.h"
struct ring {
ring_buffer_sample_fn sample_cb;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 4d5fa079b5d6..6a8f5c7a02eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -13,10 +13,15 @@
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>
+#include <linux/keyctl.h>
#include <stdlib.h>
#include "bpf.h"
#endif
+#ifndef SHA256_DIGEST_LENGTH
+#define SHA256_DIGEST_LENGTH 32
+#endif
+
#ifndef __NR_bpf
# if defined(__mips__) && defined(_ABIO32)
# define __NR_bpf 4355
@@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
__u32 data_sz;
__u32 insns_sz;
const char *errstr;
+ void *signature;
+ __u32 signature_sz;
+ __s32 keyring_id;
+ void *excl_prog_hash;
+ __u32 excl_prog_hash_sz;
};
long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
@@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
const char *map_name,
__u32 key_size,
__u32 value_size,
- __u32 max_entries)
+ __u32 max_entries,
+ const void *excl_prog_hash,
+ __u32 excl_prog_hash_sz)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_type = map_type;
+ attr.excl_prog_hash = (unsigned long) excl_prog_hash;
+ attr.excl_prog_hash_size = excl_prog_hash_sz;
+
strncpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
}
+static inline int skel_obj_get_info_by_fd(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
+ __u8 sha[SHA256_DIGEST_LENGTH];
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ union bpf_attr attr;
+
+ memset(&info, 0, sizeof(info));
+ info.hash = (long) &sha;
+ info.hash_size = SHA256_DIGEST_LENGTH;
+
+ memset(&attr, 0, attr_sz);
+ attr.info.bpf_fd = fd;
+ attr.info.info = (long) &info;
+ attr.info.info_len = info_len;
+ return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
+}
+
+static inline int skel_map_freeze(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+
+ return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+}
#ifdef __KERNEL__
#define set_err
#else
@@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
- const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
+ opts->excl_prog_hash, opts->excl_prog_hash_sz);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
set_err;
@@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
goto out;
}
+#ifndef __KERNEL__
+ err = skel_map_freeze(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to freeze map";
+ set_err;
+ goto out;
+ }
+ err = skel_obj_get_info_by_fd(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to fetch obj info";
+ set_err;
+ goto out;
+ }
+#endif
+
memset(&attr, 0, prog_load_attr_sz);
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
attr.insns = (long) opts->insns;
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
attr.license = (long) "Dual BSD/GPL";
+#ifndef __KERNEL__
+ attr.signature = (long) opts->signature;
+ attr.signature_size = opts->signature_sz;
+#else
+ if (opts->signature || opts->signature_sz)
+ pr_warn("signatures are not supported from bpf_preload\n");
+#endif
+ attr.keyring_id = opts->keyring_id;
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
attr.fd_array = (long) &map_fd;
attr.log_level = opts->ctx->log_level;
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
deleted file mode 100644
index 9a541762f54c..000000000000
--- a/tools/lib/bpf/str_error.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-#undef _GNU_SOURCE
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include "str_error.h"
-
-#ifndef ENOTSUPP
-#define ENOTSUPP 524
-#endif
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-/*
- * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
- * libc, while checking strerror_r() return to avoid having to check this in
- * all places calling it.
- */
-char *libbpf_strerror_r(int err, char *dst, int len)
-{
- int ret = strerror_r(err < 0 ? -err : err, dst, len);
- /* on glibc <2.13, ret == -1 and errno is set, if strerror_r() can't
- * handle the error, on glibc >=2.13 *positive* (errno-like) error
- * code is returned directly
- */
- if (ret == -1)
- ret = errno;
- if (ret) {
- if (ret == EINVAL)
- /* strerror_r() doesn't recognize this specific error */
- snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err);
- else
- snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
- }
- return dst;
-}
-
-const char *libbpf_errstr(int err)
-{
- static __thread char buf[12];
-
- if (err > 0)
- err = -err;
-
- switch (err) {
- case -E2BIG: return "-E2BIG";
- case -EACCES: return "-EACCES";
- case -EADDRINUSE: return "-EADDRINUSE";
- case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL";
- case -EAGAIN: return "-EAGAIN";
- case -EALREADY: return "-EALREADY";
- case -EBADF: return "-EBADF";
- case -EBADFD: return "-EBADFD";
- case -EBUSY: return "-EBUSY";
- case -ECANCELED: return "-ECANCELED";
- case -ECHILD: return "-ECHILD";
- case -EDEADLK: return "-EDEADLK";
- case -EDOM: return "-EDOM";
- case -EEXIST: return "-EEXIST";
- case -EFAULT: return "-EFAULT";
- case -EFBIG: return "-EFBIG";
- case -EILSEQ: return "-EILSEQ";
- case -EINPROGRESS: return "-EINPROGRESS";
- case -EINTR: return "-EINTR";
- case -EINVAL: return "-EINVAL";
- case -EIO: return "-EIO";
- case -EISDIR: return "-EISDIR";
- case -ELOOP: return "-ELOOP";
- case -EMFILE: return "-EMFILE";
- case -EMLINK: return "-EMLINK";
- case -EMSGSIZE: return "-EMSGSIZE";
- case -ENAMETOOLONG: return "-ENAMETOOLONG";
- case -ENFILE: return "-ENFILE";
- case -ENODATA: return "-ENODATA";
- case -ENODEV: return "-ENODEV";
- case -ENOENT: return "-ENOENT";
- case -ENOEXEC: return "-ENOEXEC";
- case -ENOLINK: return "-ENOLINK";
- case -ENOMEM: return "-ENOMEM";
- case -ENOSPC: return "-ENOSPC";
- case -ENOTBLK: return "-ENOTBLK";
- case -ENOTDIR: return "-ENOTDIR";
- case -ENOTSUPP: return "-ENOTSUPP";
- case -ENOTTY: return "-ENOTTY";
- case -ENXIO: return "-ENXIO";
- case -EOPNOTSUPP: return "-EOPNOTSUPP";
- case -EOVERFLOW: return "-EOVERFLOW";
- case -EPERM: return "-EPERM";
- case -EPIPE: return "-EPIPE";
- case -EPROTO: return "-EPROTO";
- case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT";
- case -ERANGE: return "-ERANGE";
- case -EROFS: return "-EROFS";
- case -ESPIPE: return "-ESPIPE";
- case -ESRCH: return "-ESRCH";
- case -ETXTBSY: return "-ETXTBSY";
- case -EUCLEAN: return "-EUCLEAN";
- case -EXDEV: return "-EXDEV";
- default:
- snprintf(buf, sizeof(buf), "%d", err);
- return buf;
- }
-}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
deleted file mode 100644
index 53e7fbffc13e..000000000000
--- a/tools/lib/bpf/str_error.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __LIBBPF_STR_ERROR_H
-#define __LIBBPF_STR_ERROR_H
-
-#define STRERR_BUFSIZE 128
-
-char *libbpf_strerror_r(int err, char *dst, int len);
-
-/**
- * @brief **libbpf_errstr()** returns string corresponding to numeric errno
- * @param err negative numeric errno
- * @return pointer to string representation of the errno, that is invalidated
- * upon the next call.
- */
-const char *libbpf_errstr(int err);
-
-#define errstr(err) libbpf_errstr(err)
-
-#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 2a7865c8e3fe..43deb05a5197 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
BPF_USDT_ARG_REG,
BPF_USDT_ARG_REG_DEREF,
+ BPF_USDT_ARG_SIB,
};
+/*
+ * This struct layout is designed specifically to be backwards/forward
+ * compatible between libbpf versions for ARG_CONST, ARG_REG, and
+ * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
+ */
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* arg location case, see bpf_usdt_arg() for details */
- enum __bpf_usdt_arg_type arg_type;
+ enum __bpf_usdt_arg_type arg_type: 8;
+ /* index register offset within struct pt_regs */
+ __u16 idx_reg_off: 12;
+ /* scale factor for index register (1, 2, 4, or 8) */
+ __u16 scale_bitshift: 4;
+ /* reserved for future use, keeps reg_off offset stable */
+ __u8 __reserved: 8;
+#else
+ __u8 __reserved: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum __bpf_usdt_arg_type arg_type: 8;
+#endif
/* offset of referenced register within struct pt_regs */
short reg_off;
/* whether arg should be interpreted as signed value */
@@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
struct __bpf_usdt_spec *spec;
struct __bpf_usdt_arg_spec *arg_spec;
- unsigned long val;
+ unsigned long val, idx;
int err, spec_id;
*res = 0;
@@ -204,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
val >>= arg_spec->arg_bitshift;
#endif
break;
+ case BPF_USDT_ARG_SIB:
+ /* Arg is in memory addressed by SIB (Scale-Index-Base) mode
+ * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
+ * fetch the base register contents and the index register
+ * contents from pt_regs. Then we calculate the final address
+ * as base + (index * scale) + offset, and do a user-space
+ * probe read to fetch the argument value.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
default:
return -EINVAL;
}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 3373b9d45ac4..d1524f6f54ae 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -20,7 +20,6 @@
#include "libbpf_common.h"
#include "libbpf_internal.h"
#include "hashmap.h"
-#include "str_error.h"
/* libbpf's USDT support consists of BPF-side state/code and user-space
* state/code working together in concert. BPF-side parts are defined in
@@ -200,12 +199,23 @@ enum usdt_arg_type {
USDT_ARG_CONST,
USDT_ARG_REG,
USDT_ARG_REG_DEREF,
+ USDT_ARG_SIB,
};
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
__u64 val_off;
- enum usdt_arg_type arg_type;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ enum usdt_arg_type arg_type: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+#else
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum usdt_arg_type arg_type: 8;
+#endif
short reg_off;
bool arg_signed;
char arg_bitshift;
@@ -570,9 +580,8 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
- struct usdt_note *usdt_note);
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
+ size_t desc_off, struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -626,7 +635,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -1132,8 +1141,7 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
@@ -1283,11 +1291,51 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
- char reg_name[16];
- int len, reg_off;
- long off;
+ char reg_name[16] = {0}, idx_reg_name[16] = {0};
+ int len, reg_off, idx_reg_off, scale = 1;
+ long off = 0;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
+ sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, reg_name, idx_reg_name, &len) == 3
+ ) {
+ /*
+ * Scale Index Base case:
+ * 1@-96(%rbp,%rax,8)
+ * 1@(%rbp,%rax,8)
+ * 1@-96(%rbp,%rax)
+ * 1@(%rbp,%rax)
+ */
+ arg->arg_type = USDT_ARG_SIB;
+ arg->val_off = off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+
+ idx_reg_off = calc_pt_regs_off(idx_reg_name);
+ if (idx_reg_off < 0)
+ return idx_reg_off;
+ arg->idx_reg_off = idx_reg_off;
+
+ /* validate scale factor and set fields directly */
+ switch (scale) {
+ case 1: arg->scale_bitshift = 0; break;
+ case 2: arg->scale_bitshift = 1; break;
+ case 4: arg->scale_bitshift = 2; break;
+ case 8: arg->scale_bitshift = 3; break;
+ default:
+ pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
+ return -EINVAL;
+ }
+ } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1306,6 +1354,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
+ /* register read has no memory offset */
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
@@ -1327,8 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
#elif defined(__s390x__)
-/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
-
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
unsigned int reg;
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index b20a5280f2b3..4160e7d2e120 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -368,10 +368,12 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
.cpu = -1
};
- // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return __perf_cpu_map__nr(map) > 0
- ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
- : result;
+ if (!map)
+ return result;
+
+ // The CPUs are always sorted and nr is always > 0 as 0 length map is
+ // encoded as NULL.
+ return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1);
}
/** Is 'b' a subset of 'a'. */
@@ -453,21 +455,33 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- struct perf_cpu *tmp_cpus;
- int tmp_len;
int i, j, k;
- struct perf_cpu_map *merged = NULL;
+ struct perf_cpu_map *merged;
if (perf_cpu_map__is_subset(other, orig))
return perf_cpu_map__get(orig);
if (perf_cpu_map__is_subset(orig, other))
return perf_cpu_map__get(other);
- tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
- tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
- if (!tmp_cpus)
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else { /* CPUs match. */
+ i++;
+ j++;
+ k++;
+ }
+ }
+ if (k == 0) /* Maps are completely disjoint. */
return NULL;
+ merged = perf_cpu_map__alloc(k);
+ if (!merged)
+ return NULL;
+ /* Entries are added to merged in sorted order, so no need to sort again. */
i = j = k = 0;
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
@@ -476,11 +490,8 @@ struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
j++;
else {
j++;
- tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++);
}
}
- if (k)
- merged = cpu_map__trim_new(k, tmp_cpus);
- free(tmp_cpus);
return merged;
}
diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h
index a3f6d68edad7..06cc132d88cf 100644
--- a/tools/lib/perf/include/perf/core.h
+++ b/tools/lib/perf/include/perf/core.h
@@ -5,7 +5,7 @@
#include <stdarg.h>
#ifndef LIBPERF_API
-#define LIBPERF_API __attribute__((visibility("default")))
+#define LIBPERF_API extern __attribute__((visibility("default")))
#endif
enum libperf_print_level {
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 6608f1e3701b..43a8cb04994f 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -151,6 +151,18 @@ struct perf_record_switch {
__u32 next_prev_tid;
};
+struct perf_record_callchain_deferred {
+ struct perf_event_header header;
+ /*
+ * This is to match kernel and (deferred) user stacks together.
+ * The kernel part will be in the sample callchain array after
+ * the PERF_CONTEXT_USER_DEFERRED entry.
+ */
+ __u64 cookie;
+ __u64 nr;
+ __u64 ips[];
+};
+
struct perf_record_header_attr {
struct perf_event_header header;
struct perf_event_attr attr;
@@ -291,6 +303,7 @@ struct perf_record_header_event_type {
struct perf_record_header_tracing_data {
struct perf_event_header header;
__u32 size;
+ __u32 pad;
};
#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
@@ -522,6 +535,7 @@ union perf_event {
struct perf_record_read read;
struct perf_record_throttle throttle;
struct perf_record_sample sample;
+ struct perf_record_callchain_deferred callchain_deferred;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
struct perf_record_text_poke_event text_poke;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c1a51d925e0e..ec124eb0ec0a 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -508,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
idx = READ_ONCE(pc->index);
cnt = READ_ONCE(pc->offset);
if (pc->cap_user_rdpmc && idx) {
- s64 evcnt = read_perf_counter(idx - 1);
+ u64 evcnt = read_perf_counter(idx - 1);
u16 width = READ_ONCE(pc->pmc_width);
evcnt <<= 64 - width;
diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/__init__.py
diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/abi/__init__.py
diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py
new file mode 100644
index 000000000000..9b8db70067ef
--- /dev/null
+++ b/tools/lib/python/abi/abi_parser.py
@@ -0,0 +1,628 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+from argparse import Namespace
+import logging
+import os
+import re
+
+from pprint import pformat
+from random import randrange, seed
+
+# Import Python modules
+
+from abi.helpers import AbiDebug, ABI_DIR
+
+
+class AbiParser:
+ """Main class to parse ABI files"""
+
+ TAGS = r"(what|where|date|kernelversion|contact|description|users)"
+ XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
+
+ def __init__(self, directory, logger=None,
+ enable_lineno=False, show_warnings=True, debug=0):
+ """Stores arguments for the class and initialize class vars"""
+
+ self.directory = directory
+ self.enable_lineno = enable_lineno
+ self.show_warnings = show_warnings
+ self.debug = debug
+
+ if not logger:
+ self.log = logging.getLogger("get_abi")
+ else:
+ self.log = logger
+
+ self.data = {}
+ self.what_symbols = {}
+ self.file_refs = {}
+ self.what_refs = {}
+
+ # Ignore files that contain such suffixes
+ self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
+
+ # Regular expressions used on parser
+ self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
+ self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
+ self.re_valid = re.compile(self.TAGS)
+ self.re_start_spc = re.compile(r"(\s*)(\S.*)")
+ self.re_whitespace = re.compile(r"^\s+")
+
+ # Regular used on print
+ self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
+ self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
+ self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
+ self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
+ self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
+ self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
+ self.re_xref_node = re.compile(self.XREF)
+
+ def warn(self, fdata, msg, extra=None):
+ """Displays a parse error if warning is enabled"""
+
+ if not self.show_warnings:
+ return
+
+ msg = f"{fdata.fname}:{fdata.ln}: {msg}"
+ if extra:
+ msg += "\n\t\t" + extra
+
+ self.log.warning(msg)
+
+ def add_symbol(self, what, fname, ln=None, xref=None):
+ """Create a reference table describing where each 'what' is located"""
+
+ if what not in self.what_symbols:
+ self.what_symbols[what] = {"file": {}}
+
+ if fname not in self.what_symbols[what]["file"]:
+ self.what_symbols[what]["file"][fname] = []
+
+ if ln and ln not in self.what_symbols[what]["file"][fname]:
+ self.what_symbols[what]["file"][fname].append(ln)
+
+ if xref:
+ self.what_symbols[what]["xref"] = xref
+
+ def _parse_line(self, fdata, line):
+ """Parse a single line of an ABI file"""
+
+ new_what = False
+ new_tag = False
+ content = None
+
+ match = self.re_tag.match(line)
+ if match:
+ new = match.group(1).lower()
+ sep = match.group(2)
+ content = match.group(3)
+
+ match = self.re_valid.search(new)
+ if match:
+ new_tag = match.group(1)
+ else:
+ if fdata.tag == "description":
+ # New "tag" is actually part of description.
+ # Don't consider it a tag
+ new_tag = False
+ elif fdata.tag != "":
+ self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
+
+ if new_tag:
+ # "where" is Invalid, but was a common mistake. Warn if found
+ if new_tag == "where":
+ self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
+ new_tag = "what"
+
+ if new_tag == "what":
+ fdata.space = None
+
+ if content not in self.what_symbols:
+ self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
+
+ if fdata.tag == "what":
+ fdata.what.append(content.strip("\n"))
+ else:
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fdata.fname,
+ ln=fdata.what_ln, xref=fdata.key)
+
+ fdata.label = content
+ new_what = True
+
+ key = "abi_" + content.lower()
+ fdata.key = self.re_unprintable.sub("_", key).strip("_")
+
+ # Avoid duplicated keys but using a defined seed, to make
+ # the namespace identical if there aren't changes at the
+ # ABI symbols
+ seed(42)
+
+ while fdata.key in self.data:
+ char = randrange(0, 51) + ord("A")
+ if char > ord("Z"):
+ char += ord("a") - ord("Z") - 1
+
+ fdata.key += chr(char)
+
+ if fdata.key and fdata.key not in self.data:
+ self.data[fdata.key] = {
+ "what": [content],
+ "file": [fdata.file_ref],
+ "path": fdata.ftype,
+ "line_no": fdata.ln,
+ }
+
+ fdata.what = self.data[fdata.key]["what"]
+
+ self.what_refs[content] = fdata.key
+ fdata.tag = new_tag
+ fdata.what_ln = fdata.ln
+
+ if fdata.nametag["what"]:
+ t = (content, fdata.key)
+ if t not in fdata.nametag["symbols"]:
+ fdata.nametag["symbols"].append(t)
+
+ return
+
+ if fdata.tag and new_tag:
+ fdata.tag = new_tag
+
+ if new_what:
+ fdata.label = ""
+
+ if "description" in self.data[fdata.key]:
+ self.data[fdata.key]["description"] += "\n\n"
+
+ if fdata.file_ref not in self.data[fdata.key]["file"]:
+ self.data[fdata.key]["file"].append(fdata.file_ref)
+
+ if self.debug == AbiDebug.WHAT_PARSING:
+ self.log.debug("what: %s", fdata.what)
+
+ if not fdata.what:
+ self.warn(fdata, "'What:' should come first:", line)
+ return
+
+ if new_tag == "description":
+ fdata.space = None
+
+ if content:
+ sep = sep.replace(":", " ")
+
+ c = " " * len(new_tag) + sep + content
+ c = c.expandtabs()
+
+ match = self.re_start_spc.match(c)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+ content = match.group(2) + "\n"
+
+ self.data[fdata.key][fdata.tag] = content
+
+ return
+
+ # Store any contents before tags at the database
+ if not fdata.tag and "what" in fdata.nametag:
+ fdata.nametag["description"] += line
+ return
+
+ if fdata.tag == "description":
+ content = line.expandtabs()
+
+ if self.re_whitespace.sub("", content) == "":
+ self.data[fdata.key][fdata.tag] += "\n"
+ return
+
+ if fdata.space is None:
+ match = self.re_start_spc.match(content)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+
+ content = match.group(2) + "\n"
+ else:
+ if content.startswith(fdata.space):
+ content = content[len(fdata.space):]
+
+ else:
+ fdata.space = ""
+
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += content
+ return
+
+ content = line.strip()
+ if fdata.tag:
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
+ return
+
+ # Everything else is error
+ if content:
+ self.warn(fdata, "Unexpected content", line)
+
+ def parse_readme(self, nametag, fname):
+ """Parse ABI README file"""
+
+ nametag["what"] = ["Introduction"]
+ nametag["path"] = "README"
+ with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+ for line in fp:
+ match = self.re_tag.match(line)
+ if match:
+ new = match.group(1).lower()
+
+ match = self.re_valid.search(new)
+ if match:
+ nametag["description"] += "\n:" + line
+ continue
+
+ nametag["description"] += line
+
+ def parse_file(self, fname, path, basename):
+ """Parse a single file"""
+
+ ref = f"abi_file_{path}_{basename}"
+ ref = self.re_unprintable.sub("_", ref).strip("_")
+
+ # Store per-file state into a namespace variable. This will be used
+ # by the per-line parser state machine and by the warning function.
+ fdata = Namespace
+
+ fdata.fname = fname
+ fdata.name = basename
+
+ pos = fname.find(ABI_DIR)
+ if pos > 0:
+ f = fname[pos:]
+ else:
+ f = fname
+
+ fdata.file_ref = (f, ref)
+ self.file_refs[f] = ref
+
+ fdata.ln = 0
+ fdata.what_ln = 0
+ fdata.tag = ""
+ fdata.label = ""
+ fdata.what = []
+ fdata.key = None
+ fdata.xrefs = None
+ fdata.space = None
+ fdata.ftype = path.split("/")[0]
+
+ fdata.nametag = {}
+ fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
+ fdata.nametag["type"] = "File"
+ fdata.nametag["path"] = fdata.ftype
+ fdata.nametag["file"] = [fdata.file_ref]
+ fdata.nametag["line_no"] = 1
+ fdata.nametag["description"] = ""
+ fdata.nametag["symbols"] = []
+
+ self.data[ref] = fdata.nametag
+
+ if self.debug & AbiDebug.WHAT_OPEN:
+ self.log.debug("Opening file %s", fname)
+
+ if basename == "README":
+ self.parse_readme(fdata.nametag, fname)
+ return
+
+ with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+ for line in fp:
+ fdata.ln += 1
+
+ self._parse_line(fdata, line)
+
+ if "description" in fdata.nametag:
+ fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
+
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fname, xref=fdata.key)
+
+ def _parse_abi(self, root=None):
+ """Internal function to parse documentation ABI recursively"""
+
+ if not root:
+ root = self.directory
+
+ with os.scandir(root) as obj:
+ for entry in obj:
+ name = os.path.join(root, entry.name)
+
+ if entry.is_dir():
+ self._parse_abi(name)
+ continue
+
+ if not entry.is_file():
+ continue
+
+ basename = os.path.basename(name)
+
+ if basename.startswith("."):
+ continue
+
+ if basename.endswith(self.ignore_suffixes):
+ continue
+
+ path = self.re_abi_dir.sub("", os.path.dirname(name))
+
+ self.parse_file(name, path, basename)
+
+ def parse_abi(self, root=None):
+ """Parse documentation ABI"""
+
+ self._parse_abi(root)
+
+ if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
+ self.log.debug(pformat(self.data))
+
+ def desc_txt(self, desc):
+ """Print description as found inside ABI files"""
+
+ desc = desc.strip(" \t\n")
+
+ return desc + "\n\n"
+
+ def xref(self, fname):
+ """
+ Converts a Documentation/ABI + basename into a ReST cross-reference
+ """
+
+ xref = self.file_refs.get(fname)
+ if not xref:
+ return None
+ else:
+ return xref
+
+ def desc_rst(self, desc):
+ """Enrich ReST output by creating cross-references"""
+
+ # Remove title markups from the description
+ # Having titles inside ABI files will only work if extra
+ # care would be taken in order to strictly follow the same
+ # level order for each markup.
+ desc = self.re_title_mark.sub("\n\n", "\n" + desc)
+ desc = desc.rstrip(" \t\n").lstrip("\n")
+
+ # Python's regex performance for non-compiled expressions is a lot
+ # than Perl, as Perl automatically caches them at their
+ # first usage. Here, we'll need to do the same, as otherwise the
+ # performance penalty is be high
+
+ new_desc = ""
+ for d in desc.split("\n"):
+ if d == "":
+ new_desc += "\n"
+ continue
+
+ # Use cross-references for doc files where needed
+ d = self.re_doc.sub(r":doc:`/\1`", d)
+
+ # Use cross-references for ABI generated docs where needed
+ matches = self.re_abi.findall(d)
+ for m in matches:
+ abi = m[0] + m[1]
+
+ xref = self.file_refs.get(abi)
+ if not xref:
+ # This may happen if ABI is on a separate directory,
+ # like parsing ABI testing and symbol is at stable.
+ # The proper solution is to move this part of the code
+ # for it to be inside sphinx/kernel_abi.py
+ self.log.info("Didn't find ABI reference for '%s'", abi)
+ else:
+ new = self.re_escape.sub(r"\\\1", m[1])
+ d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
+
+ # Seek for cross reference symbols like /sys/...
+ # Need to be careful to avoid doing it on a code block
+ if d[0] not in [" ", "\t"]:
+ matches = self.re_xref_node.findall(d)
+ for m in matches:
+ # Finding ABI here is more complex due to wildcards
+ xref = self.what_refs.get(m)
+ if xref:
+ new = self.re_escape.sub(r"\\\1", m)
+ d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
+
+ new_desc += d + "\n"
+
+ return new_desc + "\n\n"
+
+ def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
+ filter_path=None):
+ """Print ABI at stdout"""
+
+ part = None
+ for key, v in sorted(self.data.items(),
+ key=lambda x: (x[1].get("type", ""),
+ x[1].get("what"))):
+
+ wtype = v.get("type", "Symbol")
+ file_ref = v.get("file")
+ names = v.get("what", [""])
+
+ if wtype == "File":
+ if not show_file:
+ continue
+ else:
+ if not show_symbols:
+ continue
+
+ if filter_path:
+ if v.get("path") != filter_path:
+ continue
+
+ msg = ""
+
+ if wtype != "File":
+ cur_part = names[0]
+ if cur_part.find("/") >= 0:
+ match = self.re_what.match(cur_part)
+ if match:
+ symbol = match.group(1).rstrip("/")
+ cur_part = "Symbols under " + symbol
+
+ if cur_part and cur_part != part:
+ part = cur_part
+ msg += part + "\n"+ "-" * len(part) +"\n\n"
+
+ msg += f".. _{key}:\n\n"
+
+ max_len = 0
+ for i in range(0, len(names)): # pylint: disable=C0200
+ names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
+
+ max_len = max(max_len, len(names[i]))
+
+ msg += "+-" + "-" * max_len + "-+\n"
+ for name in names:
+ msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
+ msg += "+-" + "-" * max_len + "-+\n"
+ msg += "\n"
+
+ for ref in file_ref:
+ if wtype == "File":
+ msg += f".. _{ref[1]}:\n\n"
+ else:
+ base = os.path.basename(ref[0])
+ msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
+
+ if wtype == "File":
+ msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
+
+ desc = v.get("description")
+ if not desc and wtype != "File":
+ msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
+
+ if desc:
+ if output_in_txt:
+ msg += self.desc_txt(desc)
+ else:
+ msg += self.desc_rst(desc)
+
+ symbols = v.get("symbols")
+ if symbols:
+ msg += "Has the following ABI:\n\n"
+
+ for w, label in symbols:
+ # Escape special chars from content
+ content = self.re_escape.sub(r"\\\1", w)
+
+ msg += f"- :ref:`{content} <{label}>`\n\n"
+
+ users = v.get("users")
+ if users and users.strip(" \t\n"):
+ users = users.strip("\n").replace('\n', '\n\t')
+ msg += f"Users:\n\t{users}\n\n"
+
+ ln = v.get("line_no", 1)
+
+ yield (msg, file_ref[0][0], ln)
+
+ def check_issues(self):
+ """Warn about duplicated ABI entries"""
+
+ for what, v in self.what_symbols.items():
+ files = v.get("file")
+ if not files:
+ # Should never happen if the parser works properly
+ self.log.warning("%s doesn't have a file associated", what)
+ continue
+
+ if len(files) == 1:
+ continue
+
+ f = []
+ for fname, lines in sorted(files.items()):
+ if not lines:
+ f.append(f"{fname}")
+ elif len(lines) == 1:
+ f.append(f"{fname}:{lines[0]}")
+ else:
+ m = fname + "lines "
+ m += ", ".join(str(x) for x in lines)
+ f.append(m)
+
+ self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
+
+ def search_symbols(self, expr):
+ """ Searches for ABI symbols """
+
+ regex = re.compile(expr, re.I)
+
+ found_keys = 0
+ for t in sorted(self.data.items(), key=lambda x: [0]):
+ v = t[1]
+
+ wtype = v.get("type", "")
+ if wtype == "File":
+ continue
+
+ for what in v.get("what", [""]):
+ if regex.search(what):
+ found_keys += 1
+
+ kernelversion = v.get("kernelversion", "").strip(" \t\n")
+ date = v.get("date", "").strip(" \t\n")
+ contact = v.get("contact", "").strip(" \t\n")
+ users = v.get("users", "").strip(" \t\n")
+ desc = v.get("description", "").strip(" \t\n")
+
+ files = []
+ for f in v.get("file", ()):
+ files.append(f[0])
+
+ what = str(found_keys) + ". " + what
+ title_tag = "-" * len(what)
+
+ print(f"\n{what}\n{title_tag}\n")
+
+ if kernelversion:
+ print(f"Kernel version:\t\t{kernelversion}")
+
+ if date:
+ print(f"Date:\t\t\t{date}")
+
+ if contact:
+ print(f"Contact:\t\t{contact}")
+
+ if users:
+ print(f"Users:\t\t\t{users}")
+
+ print("Defined on file(s):\t" + ", ".join(files))
+
+ if desc:
+ desc = desc.strip("\n")
+ print(f"\n{desc}\n")
+
+ if not found_keys:
+ print(f"Regular expression /{expr}/ not found.")
diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py
new file mode 100644
index 000000000000..d5553206de3c
--- /dev/null
+++ b/tools/lib/python/abi/abi_regex.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# xxpylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Convert ABI what into regular expressions
+"""
+
+import re
+import sys
+
+from pprint import pformat
+
+from abi.abi_parser import AbiParser
+from abi.helpers import AbiDebug
+
+class AbiRegex(AbiParser):
+ """Extends AbiParser to search ABI nodes with regular expressions"""
+
+ # Escape only ASCII visible characters
+ escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
+ leave_others = "others"
+
+ # Tuples with regular expressions to be compiled and replacement data
+ re_whats = [
+ # Drop escape characters that might exist
+ (re.compile("\\\\"), ""),
+
+ # Temporarily escape dot characters
+ (re.compile(r"\."), "\xf6"),
+
+ # Temporarily change [0-9]+ type of patterns
+ (re.compile(r"\[0\-9\]\+"), "\xff"),
+
+ # Temporarily change [\d+-\d+] type of patterns
+ (re.compile(r"\[0\-\d+\]"), "\xff"),
+ (re.compile(r"\[0:\d+\]"), "\xff"),
+ (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
+
+ # Temporarily change [0-9] type of patterns
+ (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
+
+ # Handle multiple option patterns
+ (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
+
+ # Handle wildcards
+ (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
+ (re.compile(r"/\*/"), "/.*/"),
+ (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
+ (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
+ (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
+ (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
+
+ (re.compile(r"XX+"), "\\\\w\xf7"),
+ (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
+ (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
+ (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
+
+ # Recover [0-9] type of patterns
+ (re.compile(r"\xf4"), "["),
+ (re.compile(r"\xf5"), "]"),
+
+ # Remove duplicated spaces
+ (re.compile(r"\s+"), r" "),
+
+ # Special case: drop comparison as in:
+ # What: foo = <something>
+ # (this happens on a few IIO definitions)
+ (re.compile(r"\s*\=.*$"), ""),
+
+ # Escape all other symbols
+ (re.compile(escape_symbols), r"\\\1"),
+ (re.compile(r"\\\\"), r"\\"),
+ (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
+ (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
+
+ (re.compile(r"\xff"), r"\\d+"),
+
+ # Special case: IIO ABI which a parenthesis.
+ (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
+
+ # Simplify regexes with multiple .*
+ (re.compile(r"(?:\.\*){2,}"), ""),
+
+ # Recover dot characters
+ (re.compile(r"\xf6"), "\\."),
+ # Recover plus characters
+ (re.compile(r"\xf7"), "+"),
+ ]
+ re_has_num = re.compile(r"\\d")
+
+ # Symbol name after escape_chars that are considered a devnode basename
+ re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
+
+ # List of popular group names to be skipped to minimize regex group size
+ # Use AbiDebug.SUBGROUP_SIZE to detect those
+ skip_names = set(["devices", "hwmon"])
+
+ def regex_append(self, what, new):
+ """
+ Get a search group for a subset of regular expressions.
+
+ As ABI may have thousands of symbols, using a for to search all
+ regular expressions is at least O(n^2). When there are wildcards,
+ the complexity increases substantially, eventually becoming exponential.
+
+ To avoid spending too much time on them, use a logic to split
+ them into groups. The smaller the group, the better, as it would
+ mean that searches will be confined to a small number of regular
+ expressions.
+
+ The conversion to a regex subset is tricky, as we need something
+ that can be easily obtained from the sysfs symbol and from the
+ regular expression. So, we need to discard nodes that have
+ wildcards.
+
+ If it can't obtain a subgroup, place the regular expression inside
+ a special group (self.leave_others).
+ """
+
+ search_group = None
+
+ for search_group in reversed(new.split("/")):
+ if not search_group or search_group in self.skip_names:
+ continue
+ if self.re_symbol_name.match(search_group):
+ break
+
+ if not search_group:
+ search_group = self.leave_others
+
+ if self.debug & AbiDebug.SUBGROUP_MAP:
+ self.log.debug("%s: mapped as %s", what, search_group)
+
+ try:
+ if search_group not in self.regex_group:
+ self.regex_group[search_group] = []
+
+ self.regex_group[search_group].append(re.compile(new))
+ if self.search_string:
+ if what.find(self.search_string) >= 0:
+ print(f"What: {what}")
+ except re.PatternError:
+ self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
+ " '%s'", what, new)
+
+ def get_regexes(self, what):
+ """
+ Given an ABI devnode, return a list of all regular expressions that
+ may match it, based on the sub-groups created by regex_append()
+ """
+
+ re_list = []
+
+ patches = what.split("/")
+ patches.reverse()
+ patches.append(self.leave_others)
+
+ for search_group in patches:
+ if search_group in self.regex_group:
+ re_list += self.regex_group[search_group]
+
+ return re_list
+
+ def __init__(self, *args, **kwargs):
+ """
+ Override init method to get verbose argument
+ """
+
+ self.regex_group = None
+ self.search_string = None
+ self.re_string = None
+
+ if "search_string" in kwargs:
+ self.search_string = kwargs.get("search_string")
+ del kwargs["search_string"]
+
+ if self.search_string:
+
+ try:
+ self.re_string = re.compile(self.search_string)
+ except re.PatternError as e:
+ msg = f"{self.search_string} is not a valid regular expression"
+ raise ValueError(msg) from e
+
+ super().__init__(*args, **kwargs)
+
+ def parse_abi(self, *args, **kwargs):
+
+ super().parse_abi(*args, **kwargs)
+
+ self.regex_group = {}
+
+ print("Converting ABI What fields into regexes...", file=sys.stderr)
+
+ for t in sorted(self.data.items(), key=lambda x: x[0]):
+ v = t[1]
+ if v.get("type") == "File":
+ continue
+
+ v["regex"] = []
+
+ for what in v.get("what", []):
+ if not what.startswith("/sys"):
+ continue
+
+ new = what
+ for r, s in self.re_whats:
+ try:
+ new = r.sub(s, new)
+ except re.PatternError as e:
+ # Help debugging troubles with new regexes
+ raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
+
+ v["regex"].append(new)
+
+ if self.debug & AbiDebug.REGEX:
+ self.log.debug("%-90s <== %s", new, what)
+
+ # Store regex into a subgroup to speedup searches
+ self.regex_append(what, new)
+
+ if self.debug & AbiDebug.SUBGROUP_DICT:
+ self.log.debug("%s", pformat(self.regex_group))
+
+ if self.debug & AbiDebug.SUBGROUP_SIZE:
+ biggestd_keys = sorted(self.regex_group.keys(),
+ key= lambda k: len(self.regex_group[k]),
+ reverse=True)
+
+ print("Top regex subgroups:", file=sys.stderr)
+ for k in biggestd_keys[:10]:
+ print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py
new file mode 100644
index 000000000000..639b23e4ca33
--- /dev/null
+++ b/tools/lib/python/abi/helpers.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0903
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Helper classes for ABI parser
+"""
+
+ABI_DIR = "Documentation/ABI/"
+
+
+class AbiDebug:
+ """Debug levels"""
+
+ WHAT_PARSING = 1
+ WHAT_OPEN = 2
+ DUMP_ABI_STRUCTS = 4
+ UNDEFINED = 8
+ REGEX = 16
+ SUBGROUP_MAP = 32
+ SUBGROUP_DICT = 64
+ SUBGROUP_SIZE = 128
+ GRAPH = 256
+
+
+DEBUG_HELP = """
+1 - enable debug parsing logic
+2 - enable debug messages on file open
+4 - enable debug for ABI parse data
+8 - enable extra debug information to identify troubles
+ with ABI symbols found at the local machine that
+ weren't found on ABI documentation (used only for
+ undefined subcommand)
+16 - enable debug for what to regex conversion
+32 - enable debug for symbol regex subgroups
+64 - enable debug for sysfs graph tree variable
+"""
diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py
new file mode 100644
index 000000000000..4a2554da217b
--- /dev/null
+++ b/tools/lib/python/abi/system_symbols.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0912,R0914,R0915,R1702
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import os
+import re
+import sys
+
+from concurrent import futures
+from datetime import datetime
+from random import shuffle
+
+from abi.helpers import AbiDebug
+
+class SystemSymbols:
+ """Stores arguments for the class and initialize class vars"""
+
+ def graph_add_file(self, path, link=None):
+ """
+ add a file path to the sysfs graph stored at self.root
+ """
+
+ if path in self.files:
+ return
+
+ name = ""
+ ref = self.root
+ for edge in path.split("/"):
+ name += edge + "/"
+ if edge not in ref:
+ ref[edge] = {"__name": [name.rstrip("/")]}
+
+ ref = ref[edge]
+
+ if link and link not in ref["__name"]:
+ ref["__name"].append(link.rstrip("/"))
+
+ self.files.add(path)
+
+ def print_graph(self, root_prefix="", root=None, level=0):
+ """Prints a reference tree graph using UTF-8 characters"""
+
+ if not root:
+ root = self.root
+ level = 0
+
+ # Prevent endless traverse
+ if level > 5:
+ return
+
+ if level > 0:
+ prefix = "├──"
+ last_prefix = "└──"
+ else:
+ prefix = ""
+ last_prefix = ""
+
+ items = list(root.items())
+
+ names = root.get("__name", [])
+ for k, edge in items:
+ if k == "__name":
+ continue
+
+ if not k:
+ k = "/"
+
+ if len(names) > 1:
+ k += " links: " + ",".join(names[1:])
+
+ if edge == items[-1][1]:
+ print(root_prefix + last_prefix + k)
+ p = root_prefix
+ if level > 0:
+ p += " "
+ self.print_graph(p, edge, level + 1)
+ else:
+ print(root_prefix + prefix + k)
+ p = root_prefix + "│ "
+ self.print_graph(p, edge, level + 1)
+
+ def _walk(self, root):
+ """
+ Walk through sysfs to get all devnodes that aren't ignored.
+
+ By default, uses /sys as sysfs mounting point. If another
+ directory is used, it replaces them to /sys at the patches.
+ """
+
+ with os.scandir(root) as obj:
+ for entry in obj:
+ path = os.path.join(root, entry.name)
+ if self.sysfs:
+ p = path.replace(self.sysfs, "/sys", count=1)
+ else:
+ p = path
+
+ if self.re_ignore.search(p):
+ return
+
+ # Handle link first to avoid directory recursion
+ if entry.is_symlink():
+ real = os.path.realpath(path)
+ if not self.sysfs:
+ self.aliases[path] = real
+ else:
+ real = real.replace(self.sysfs, "/sys", count=1)
+
+ # Add absfile location to graph if it doesn't exist
+ if not self.re_ignore.search(real):
+ # Add link to the graph
+ self.graph_add_file(real, p)
+
+ elif entry.is_file():
+ self.graph_add_file(p)
+
+ elif entry.is_dir():
+ self._walk(path)
+
+ def __init__(self, abi, sysfs="/sys", hints=False):
+ """
+ Initialize internal variables and get a list of all files inside
+ sysfs that can currently be parsed.
+
+ Please notice that there are several entries on sysfs that aren't
+ documented as ABI. Ignore those.
+
+ The real paths will be stored under self.files. Aliases will be
+ stored in separate, as self.aliases.
+ """
+
+ self.abi = abi
+ self.log = abi.log
+
+ if sysfs != "/sys":
+ self.sysfs = sysfs.rstrip("/")
+ else:
+ self.sysfs = None
+
+ self.hints = hints
+
+ self.root = {}
+ self.aliases = {}
+ self.files = set()
+
+ dont_walk = [
+ # Those require root access and aren't documented at ABI
+ f"^{sysfs}/kernel/debug",
+ f"^{sysfs}/kernel/tracing",
+ f"^{sysfs}/fs/pstore",
+ f"^{sysfs}/fs/bpf",
+ f"^{sysfs}/fs/fuse",
+
+ # This is not documented at ABI
+ f"^{sysfs}/module",
+
+ f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI
+ f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings
+ "sections|notes", # aren't actually part of ABI
+
+ # kernel-parameters.txt - not easy to parse
+ "parameters",
+ ]
+
+ self.re_ignore = re.compile("|".join(dont_walk))
+
+ print(f"Reading {sysfs} directory contents...", file=sys.stderr)
+ self._walk(sysfs)
+
+ def check_file(self, refs, found):
+ """Check missing ABI symbols for a given sysfs file"""
+
+ res_list = []
+
+ try:
+ for names in refs:
+ fname = names[0]
+
+ res = {
+ "found": False,
+ "fname": fname,
+ "msg": "",
+ }
+ res_list.append(res)
+
+ re_what = self.abi.get_regexes(fname)
+ if not re_what:
+ self.abi.log.warning(f"missing rules for {fname}")
+ continue
+
+ for name in names:
+ for r in re_what:
+ if self.abi.debug & AbiDebug.UNDEFINED:
+ self.log.debug("check if %s matches '%s'", name, r.pattern)
+ if r.match(name):
+ res["found"] = True
+ if found:
+ res["msg"] += f" {fname}: regex:\n\t"
+ continue
+
+ if self.hints and not res["found"]:
+ res["msg"] += f" {fname} not found. Tested regexes:\n"
+ for r in re_what:
+ res["msg"] += " " + r.pattern + "\n"
+
+ except KeyboardInterrupt:
+ pass
+
+ return res_list
+
+ def _ref_interactor(self, root):
+ """Recursive function to interact over the sysfs tree"""
+
+ for k, v in root.items():
+ if isinstance(v, dict):
+ yield from self._ref_interactor(v)
+
+ if root == self.root or k == "__name":
+ continue
+
+ if self.abi.re_string:
+ fname = v["__name"][0]
+ if self.abi.re_string.search(fname):
+ yield v
+ else:
+ yield v
+
+
+ def get_fileref(self, all_refs, chunk_size):
+ """Interactor to group refs into chunks"""
+
+ n = 0
+ refs = []
+
+ for ref in all_refs:
+ refs.append(ref)
+
+ n += 1
+ if n >= chunk_size:
+ yield refs
+ n = 0
+ refs = []
+
+ yield refs
+
+ def check_undefined_symbols(self, max_workers=None, chunk_size=50,
+ found=None, dry_run=None):
+ """Seach ABI for sysfs symbols missing documentation"""
+
+ self.abi.parse_abi()
+
+ if self.abi.debug & AbiDebug.GRAPH:
+ self.print_graph()
+
+ all_refs = []
+ for ref in self._ref_interactor(self.root):
+ all_refs.append(ref["__name"])
+
+ if dry_run:
+ print("Would check", file=sys.stderr)
+ for ref in all_refs:
+ print(", ".join(ref))
+
+ return
+
+ print("Starting to search symbols (it may take several minutes):",
+ file=sys.stderr)
+ start = datetime.now()
+ old_elapsed = None
+
+ # Python doesn't support multithreading due to limitations on its
+ # global lock (GIL). While Python 3.13 finally made GIL optional,
+ # there are still issues related to it. Also, we want to have
+ # backward compatibility with older versions of Python.
+ #
+ # So, use instead multiprocess. However, Python is very slow passing
+ # data from/to multiple processes. Also, it may consume lots of memory
+ # if the data to be shared is not small. So, we need to group workload
+ # in chunks that are big enough to generate performance gains while
+ # not being so big that would cause out-of-memory.
+
+ num_refs = len(all_refs)
+ print(f"Number of references to parse: {num_refs}", file=sys.stderr)
+
+ if not max_workers:
+ max_workers = os.cpu_count()
+ elif max_workers > os.cpu_count():
+ max_workers = os.cpu_count()
+
+ max_workers = max(max_workers, 1)
+
+ max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
+ chunk_size = min(chunk_size, max_chunk_size)
+ chunk_size = max(1, chunk_size)
+
+ if max_workers > 1:
+ executor = futures.ProcessPoolExecutor
+
+ # Place references in a random order. This may help improving
+ # performance, by mixing complex/simple expressions when creating
+ # chunks
+ shuffle(all_refs)
+ else:
+ # Python has a high overhead with processes. When there's just
+ # one worker, it is faster to not create a new process.
+ # Yet, User still deserves to have a progress print. So, use
+ # python's "thread", which is actually a single process, using
+ # an internal schedule to switch between tasks. No performance
+ # gains for non-IO tasks, but still it can be quickly interrupted
+ # from time to time to display progress.
+ executor = futures.ThreadPoolExecutor
+
+ not_found = []
+ f_list = []
+ with executor(max_workers=max_workers) as exe:
+ for refs in self.get_fileref(all_refs, chunk_size):
+ if refs:
+ try:
+ f_list.append(exe.submit(self.check_file, refs, found))
+
+ except KeyboardInterrupt:
+ return
+
+ total = len(f_list)
+
+ if not total:
+ if self.abi.re_string:
+ print(f"No ABI symbol matches {self.abi.search_string}")
+ else:
+ self.abi.log.warning("No ABI symbols found")
+ return
+
+ print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
+ file=sys.stderr)
+
+ while f_list:
+ try:
+ t = futures.wait(f_list, timeout=1,
+ return_when=futures.FIRST_COMPLETED)
+
+ done = t[0]
+
+ for fut in done:
+ res_list = fut.result()
+
+ for res in res_list:
+ if not res["found"]:
+ not_found.append(res["fname"])
+ if res["msg"]:
+ print(res["msg"])
+
+ f_list.remove(fut)
+ except KeyboardInterrupt:
+ return
+
+ except RuntimeError as e:
+ self.abi.log.warning(f"Future: {e}")
+ break
+
+ if sys.stderr.isatty():
+ elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+ if len(f_list) < total:
+ elapsed += f" ({total - len(f_list)}/{total} jobs completed). "
+ if elapsed != old_elapsed:
+ print(elapsed + "\r", end="", flush=True,
+ file=sys.stderr)
+ old_elapsed = elapsed
+
+ elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+ print(elapsed, file=sys.stderr)
+
+ for f in sorted(not_found):
+ print(f"{f} not found.")
diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py
new file mode 100755
index 000000000000..b88c04d3e2fe
--- /dev/null
+++ b/tools/lib/python/feat/parse_features.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Library to parse the Linux Feature files and produce a ReST book.
+"""
+
+import os
+import re
+import sys
+
+from glob import iglob
+
+
+class ParseFeature:
+ """
+ Parses Documentation/features, allowing to generate ReST documentation
+ from it.
+ """
+
+ h_name = "Feature"
+ h_kconfig = "Kconfig"
+ h_description = "Description"
+ h_subsys = "Subsystem"
+ h_status = "Status"
+ h_arch = "Architecture"
+
+ # Sort order for status. Others will be mapped at the end.
+ status_map = {
+ "ok": 0,
+ "TODO": 1,
+ "N/A": 2,
+ # The only missing status is "..", which was mapped as "---",
+ # as this is an special ReST cell value. Let it get the
+ # default order (99).
+ }
+
+ def __init__(self, prefix, debug=0, enable_fname=False):
+ """
+ Sets internal variables
+ """
+
+ self.prefix = prefix
+ self.debug = debug
+ self.enable_fname = enable_fname
+
+ self.data = {}
+
+ # Initial maximum values use just the headers
+ self.max_size_name = len(self.h_name)
+ self.max_size_kconfig = len(self.h_kconfig)
+ self.max_size_description = len(self.h_description)
+ self.max_size_desc_word = 0
+ self.max_size_subsys = len(self.h_subsys)
+ self.max_size_status = len(self.h_status)
+ self.max_size_arch = len(self.h_arch)
+ self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch
+ self.description_size = 1
+
+ self.msg = ""
+
+ def emit(self, msg="", end="\n"):
+ self.msg += msg + end
+
+ def parse_error(self, fname, ln, msg, data=None):
+ """
+ Displays an error message, printing file name and line
+ """
+
+ if ln:
+ fname += f"#{ln}"
+
+ print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="")
+
+ if data:
+ data = data.rstrip()
+ print(f":\n\t{data}", file=sys.stderr)
+ else:
+ print("", file=sys.stderr)
+
+ def parse_feat_file(self, fname):
+ """Parses a single arch-support.txt feature file"""
+
+ if os.path.isdir(fname):
+ return
+
+ base = os.path.basename(fname)
+
+ if base != "arch-support.txt":
+ if self.debug:
+ print(f"ignoring {fname}", file=sys.stderr)
+ return
+
+ subsys = os.path.dirname(fname).split("/")[-2]
+ self.max_size_subsys = max(self.max_size_subsys, len(subsys))
+
+ feature_name = ""
+ kconfig = ""
+ description = ""
+ comments = ""
+ arch_table = {}
+
+ if self.debug > 1:
+ print(f"Opening {fname}", file=sys.stderr)
+
+ if self.enable_fname:
+ full_fname = os.path.abspath(fname)
+ self.emit(f".. FILE {full_fname}")
+
+ with open(fname, encoding="utf-8") as f:
+ for ln, line in enumerate(f, start=1):
+ line = line.strip()
+
+ match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line)
+ if match:
+ feature_name = match.group(1)
+
+ self.max_size_name = max(self.max_size_name,
+ len(feature_name))
+ continue
+
+ match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line)
+ if match:
+ kconfig = match.group(1)
+
+ self.max_size_kconfig = max(self.max_size_kconfig,
+ len(kconfig))
+ continue
+
+ match = re.match(r"^\#\s+description:\s*(.*\S)", line)
+ if match:
+ description = match.group(1)
+
+ self.max_size_description = max(self.max_size_description,
+ len(description))
+
+ words = re.split(r"\s+", line)[1:]
+ for word in words:
+ self.max_size_desc_word = max(self.max_size_desc_word,
+ len(word))
+
+ continue
+
+ if re.search(r"^\\s*$", line):
+ continue
+
+ if re.match(r"^\s*\-+\s*$", line):
+ continue
+
+ if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line):
+ continue
+
+ match = re.match(r"^\#\s*(.*)$", line)
+ if match:
+ comments += match.group(1)
+ continue
+
+ match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line)
+ if match:
+ arch = match.group(1)
+ status = match.group(2)
+
+ self.max_size_status = max(self.max_size_status,
+ len(status))
+ self.max_size_arch = max(self.max_size_arch, len(arch))
+
+ if status == "..":
+ status = "---"
+
+ arch_table[arch] = status
+
+ continue
+
+ self.parse_error(fname, ln, "Line is invalid", line)
+
+ if not feature_name:
+ self.parse_error(fname, 0, "Feature name not found")
+ return
+ if not subsys:
+ self.parse_error(fname, 0, "Subsystem not found")
+ return
+ if not kconfig:
+ self.parse_error(fname, 0, "Kconfig not found")
+ return
+ if not description:
+ self.parse_error(fname, 0, "Description not found")
+ return
+ if not arch_table:
+ self.parse_error(fname, 0, "Architecture table not found")
+ return
+
+ self.data[feature_name] = {
+ "where": fname,
+ "subsys": subsys,
+ "kconfig": kconfig,
+ "description": description,
+ "comments": comments,
+ "table": arch_table,
+ }
+
+ self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch)
+
+ def parse(self):
+ """Parses all arch-support.txt feature files inside self.prefix"""
+
+ path = os.path.expanduser(self.prefix)
+
+ if self.debug > 2:
+ print(f"Running parser for {path}")
+
+ example_path = os.path.join(path, "arch-support.txt")
+
+ for fname in iglob(os.path.join(path, "**"), recursive=True):
+ if fname != example_path:
+ self.parse_feat_file(fname)
+
+ return self.data
+
+ def output_arch_table(self, arch, feat=None):
+ """
+ Output feature(s) for a given architecture.
+ """
+
+ title = f"Feature status on {arch} architecture"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ self.emit(f"{self.h_subsys:<{self.max_size_subsys}} ", end="")
+ self.emit(f"{self.h_name:<{self.max_size_name}} ", end="")
+ self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}} ", end="")
+ self.emit(f"{self.h_status:<{self.max_size_status}} ", end="")
+ self.emit(f"{self.h_description:<{self.max_size_description}}")
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ sorted_features = sorted(self.data.keys(),
+ key=lambda x: (self.data[x]["subsys"],
+ x.lower()))
+
+ for name in sorted_features:
+ if feat and name != feat:
+ continue
+
+ arch_table = self.data[name]["table"]
+
+ if not arch in arch_table:
+ continue
+
+ self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}} ",
+ end="")
+ self.emit(f"{name:<{self.max_size_name}} ", end="")
+ self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}} ",
+ end="")
+ self.emit(f"{arch_table[arch]:<{self.max_size_status}} ",
+ end="")
+ self.emit(f"{self.data[name]['description']}")
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ return self.msg
+
+ def output_feature(self, feat):
+ """
+ Output a feature on all architectures
+ """
+
+ title = f"Feature {feat}"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ if not feat in self.data:
+ return
+
+ if self.data[feat]["subsys"]:
+ self.emit(f":Subsystem: {self.data[feat]['subsys']}")
+ if self.data[feat]["kconfig"]:
+ self.emit(f":Kconfig: {self.data[feat]['kconfig']}")
+
+ desc = self.data[feat]["description"]
+ desc = desc[0].upper() + desc[1:]
+ desc = desc.rstrip(". \t")
+ self.emit(f"\n{desc}.\n")
+
+ com = self.data[feat]["comments"].strip()
+ if com:
+ self.emit("Comments")
+ self.emit("--------")
+ self.emit(f"\n{com}\n")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ self.emit(f"{self.h_arch:<{self.max_size_arch}} ", end="")
+ self.emit(f"{self.h_status:<{self.max_size_status}}")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ arch_table = self.data[feat]["table"]
+ for arch in sorted(arch_table.keys()):
+ self.emit(f"{arch:<{self.max_size_arch}} ", end="")
+ self.emit(f"{arch_table[arch]:<{self.max_size_status}}")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ return self.msg
+
+ def matrix_lines(self, desc_size, max_size_status, header):
+ """
+ Helper function to split element tables at the output matrix
+ """
+
+ if header:
+ ln_marker = "="
+ else:
+ ln_marker = "-"
+
+ self.emit("+" + ln_marker * self.max_size_name + "+", end="")
+ self.emit(ln_marker * desc_size, end="")
+ self.emit("+" + ln_marker * max_size_status + "+")
+
+ def output_matrix(self):
+ """
+ Generates a set of tables, groped by subsystem, containing
+ what's the feature state on each architecture.
+ """
+
+ title = "Feature status on all architectures"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ desc_title = f"{self.h_kconfig} / {self.h_description}"
+
+ desc_size = self.max_size_kconfig + 4
+ if not self.description_size:
+ desc_size = max(self.max_size_description, desc_size)
+ else:
+ desc_size = max(self.description_size, desc_size)
+
+ desc_size = max(self.max_size_desc_word, desc_size, len(desc_title))
+
+ notcompat = "Not compatible"
+ self.max_size_status = max(self.max_size_status, len(notcompat))
+
+ min_status_size = self.max_size_status + self.max_size_arch + 4
+ max_size_status = max(min_status_size, self.max_size_status)
+
+ h_status_per_arch = "Status per architecture"
+ max_size_status = max(max_size_status, len(h_status_per_arch))
+
+ cur_subsys = None
+ for name in sorted(self.data.keys(),
+ key=lambda x: (self.data[x]["subsys"], x.lower())):
+ if not cur_subsys or cur_subsys != self.data[name]["subsys"]:
+ if cur_subsys:
+ self.emit()
+
+ cur_subsys = self.data[name]["subsys"]
+
+ title = f"Subsystem: {cur_subsys}"
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ self.matrix_lines(desc_size, max_size_status, 0)
+
+ self.emit(f"|{self.h_name:<{self.max_size_name}}", end="")
+ self.emit(f"|{desc_title:<{desc_size}}", end="")
+ self.emit(f"|{h_status_per_arch:<{max_size_status}}|")
+
+ self.matrix_lines(desc_size, max_size_status, 1)
+
+ lines = []
+ descs = []
+ cur_status = ""
+ line = ""
+
+ arch_table = sorted(self.data[name]["table"].items(),
+ key=lambda x: (self.status_map.get(x[1], 99),
+ x[0].lower()))
+
+ for arch, status in arch_table:
+ if status == "---":
+ status = notcompat
+
+ if status != cur_status:
+ if line != "":
+ lines.append(line)
+ line = ""
+ line = f"- **{status}**: {arch}"
+ elif len(line) + len(arch) + 2 < max_size_status:
+ line += f", {arch}"
+ else:
+ lines.append(line)
+ line = f" {arch}"
+ cur_status = status
+
+ if line != "":
+ lines.append(line)
+
+ description = self.data[name]["description"]
+ while len(description) > desc_size:
+ desc_line = description[:desc_size]
+
+ last_space = desc_line.rfind(" ")
+ if last_space != -1:
+ desc_line = desc_line[:last_space]
+ descs.append(desc_line)
+ description = description[last_space + 1:]
+ else:
+ desc_line = desc_line[:-1]
+ descs.append(desc_line + "\\")
+ description = description[len(desc_line):]
+
+ if description:
+ descs.append(description)
+
+ while len(lines) < 2 + len(descs):
+ lines.append("")
+
+ for ln, line in enumerate(lines):
+ col = ["", ""]
+
+ if not ln:
+ col[0] = name
+ col[1] = f"``{self.data[name]['kconfig']}``"
+ else:
+ if ln >= 2 and descs:
+ col[1] = descs.pop(0)
+
+ self.emit(f"|{col[0]:<{self.max_size_name}}", end="")
+ self.emit(f"|{col[1]:<{desc_size}}", end="")
+ self.emit(f"|{line:<{max_size_status}}|")
+
+ self.matrix_lines(desc_size, max_size_status, 0)
+
+ return self.msg
+
+ def list_arch_features(self, arch, feat):
+ """
+ Print a matrix of kernel feature support for the chosen architecture.
+ """
+ self.emit("#")
+ self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:")
+ self.emit("#")
+
+ # Sort by subsystem, then by feature name (case‑insensitive)
+ for name in sorted(self.data.keys(),
+ key=lambda n: (self.data[n]["subsys"].lower(),
+ n.lower())):
+ if feat and name != feat:
+ continue
+
+ feature = self.data[name]
+ arch_table = feature["table"]
+ status = arch_table.get(arch, "")
+ status = " " * ((4 - len(status)) // 2) + status
+
+ self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ",
+ end="")
+ self.emit(f"{name:<{self.max_size_name}}: ", end="")
+ self.emit(f"{status:<5}| ", end="")
+ self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ",
+ end="")
+ self.emit(f"# {feature['description']}")
+
+ return self.msg
diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
new file mode 100755
index 000000000000..a24f30ef4fa8
--- /dev/null
+++ b/tools/lib/python/jobserver.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# pylint: disable=C0103,C0209
+#
+#
+
+"""
+Interacts with the POSIX jobserver during the Kernel build time.
+
+A "normal" jobserver task, like the one initiated by a make subrocess would do:
+
+ - open read/write file descriptors to communicate with the job server;
+ - ask for one slot by calling:
+ claim = os.read(reader, 1)
+ - when the job finshes, call:
+ os.write(writer, b"+") # os.write(writer, claim)
+
+Here, the goal is different: This script aims to get the remaining number
+of slots available, using all of them to run a command which handle tasks in
+parallel. To to that, it has a loop that ends only after there are no
+slots left. It then increments the number by one, in order to allow a
+call equivalent to make -j$((claim+1)), e.g. having a parent make creating
+$claim child to do the actual work.
+
+The end goal here is to keep the total number of build tasks under the
+limit established by the initial make -j$n_proc call.
+
+See:
+ https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver
+"""
+
+import errno
+import os
+import subprocess
+import sys
+
+class JobserverExec:
+ """
+ Claim all slots from make using POSIX Jobserver.
+
+ The main methods here are:
+ - open(): reserves all slots;
+ - close(): method returns all used slots back to make;
+ - run(): executes a command setting PARALLELISM=<available slots jobs + 1>
+ """
+
+ def __init__(self):
+ """Initialize internal vars"""
+ self.claim = 0
+ self.jobs = b""
+ self.reader = None
+ self.writer = None
+ self.is_open = False
+
+ def open(self):
+ """Reserve all available slots to be claimed later on"""
+
+ if self.is_open:
+ return
+
+ try:
+ # Fetch the make environment options.
+ flags = os.environ["MAKEFLAGS"]
+ # Look for "--jobserver=R,W"
+ # Note that GNU Make has used --jobserver-fds and --jobserver-auth
+ # so this handles all of them.
+ opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
+
+ # Parse out R,W file descriptor numbers and set them nonblocking.
+ # If the MAKEFLAGS variable contains multiple instances of the
+ # --jobserver-auth= option, the last one is relevant.
+ fds = opts[-1].split("=", 1)[1]
+
+ # Starting with GNU Make 4.4, named pipes are used for reader
+ # and writer.
+ # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134
+ _, _, path = fds.partition("fifo:")
+
+ if path:
+ self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
+ self.writer = os.open(path, os.O_WRONLY)
+ else:
+ self.reader, self.writer = [int(x) for x in fds.split(",", 1)]
+ # Open a private copy of reader to avoid setting nonblocking
+ # on an unexpecting process with the same reader fd.
+ self.reader = os.open("/proc/self/fd/%d" % (self.reader),
+ os.O_RDONLY | os.O_NONBLOCK)
+
+ # Read out as many jobserver slots as possible
+ while True:
+ try:
+ slot = os.read(self.reader, 8)
+ self.jobs += slot
+ except (OSError, IOError) as e:
+ if e.errno == errno.EWOULDBLOCK:
+ # Stop at the end of the jobserver queue.
+ break
+ # If something went wrong, give back the jobs.
+ if self.jobs:
+ os.write(self.writer, self.jobs)
+ raise e
+
+ # Add a bump for our caller's reserveration, since we're just going
+ # to sit here blocked on our child.
+ self.claim = len(self.jobs) + 1
+
+ except (KeyError, IndexError, ValueError, OSError, IOError):
+ # Any missing environment strings or bad fds should result in just
+ # not being parallel.
+ self.claim = None
+
+ self.is_open = True
+
+ def close(self):
+ """Return all reserved slots to Jobserver"""
+
+ if not self.is_open:
+ return
+
+ # Return all the reserved slots.
+ if len(self.jobs):
+ os.write(self.writer, self.jobs)
+
+ self.is_open = False
+
+ def __enter__(self):
+ self.open()
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_traceback):
+ self.close()
+
+ def run(self, cmd, *args, **pwargs):
+ """
+ Run a command setting PARALLELISM env variable to the number of
+ available job slots (claim) + 1, e.g. it will reserve claim slots
+ to do the actual build work, plus one to monitor its children.
+ """
+ self.open() # Ensure that self.claim is set
+
+ # We can only claim parallelism if there was a jobserver (i.e. a
+ # top-level "-jN" argument) and there were no other failures. Otherwise
+ # leave out the environment variable and let the child figure out what
+ # is best.
+ if self.claim:
+ os.environ["PARALLELISM"] = str(self.claim)
+
+ return subprocess.call(cmd, *args, **pwargs)
diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/kdoc/__init__.py
diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py
new file mode 100644
index 000000000000..bb171567a4ca
--- /dev/null
+++ b/tools/lib/python/kdoc/enrich_formatter.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Ancillary argparse HelpFormatter class that works on a similar way as
+argparse.RawDescriptionHelpFormatter, e.g. description maintains line
+breaks, but it also implement transformations to the help text. The
+actual transformations ar given by enrich_text(), if the output is tty.
+
+Currently, the follow transformations are done:
+
+ - Positional arguments are shown in upper cases;
+ - if output is TTY, ``var`` and positional arguments are shown prepended
+ by an ANSI SGR code. This is usually translated to bold. On some
+ terminals, like, konsole, this is translated into a colored bold text.
+"""
+
+import argparse
+import re
+import sys
+
+class EnrichFormatter(argparse.HelpFormatter):
+ """
+ Better format the output, making easier to identify the positional args
+ and how they're used at the __doc__ description.
+ """
+ def __init__(self, *args, **kwargs):
+ """Initialize class and check if is TTY"""
+ super().__init__(*args, **kwargs)
+ self._tty = sys.stdout.isatty()
+
+ def enrich_text(self, text):
+ """Handle ReST markups (currently, only ``foo``)"""
+ if self._tty and text:
+ # Replace ``text`` with ANSI SGR (bold)
+ return re.sub(r'\`\`(.+?)\`\`',
+ lambda m: f'\033[1m{m.group(1)}\033[0m', text)
+ return text
+
+ def _fill_text(self, text, width, indent):
+ """Enrich descriptions with markups on it"""
+ enriched = self.enrich_text(text)
+ return "\n".join(indent + line for line in enriched.splitlines())
+
+ def _format_usage(self, usage, actions, groups, prefix):
+ """Enrich positional arguments at usage: line"""
+
+ prog = self._prog
+ parts = []
+
+ for action in actions:
+ if action.option_strings:
+ opt = action.option_strings[0]
+ if action.nargs != 0:
+ opt += f" {action.dest.upper()}"
+ parts.append(f"[{opt}]")
+ else:
+ # Positional argument
+ parts.append(self.enrich_text(f"``{action.dest.upper()}``"))
+
+ usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n"
+ return usage_text
+
+ def _format_action_invocation(self, action):
+ """Enrich argument names"""
+ if not action.option_strings:
+ return self.enrich_text(f"``{action.dest.upper()}``")
+
+ return ", ".join(action.option_strings)
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
new file mode 100644
index 000000000000..bfe02baf1606
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0913,R0914,R0917
+
+"""
+Parse lernel-doc tags on multiple kernel source files.
+"""
+
+import argparse
+import logging
+import os
+import re
+
+from kdoc.kdoc_parser import KernelDoc
+from kdoc.kdoc_output import OutputFormat
+
+
+class GlobSourceFiles:
+ """
+ Parse C source code file names and directories via an Interactor.
+ """
+
+ def __init__(self, srctree=None, valid_extensions=None):
+ """
+ Initialize valid extensions with a tuple.
+
+ If not defined, assume default C extensions (.c and .h)
+
+ It would be possible to use python's glob function, but it is
+ very slow, and it is not interactive. So, it would wait to read all
+ directories before actually do something.
+
+ So, let's use our own implementation.
+ """
+
+ if not valid_extensions:
+ self.extensions = (".c", ".h")
+ else:
+ self.extensions = valid_extensions
+
+ self.srctree = srctree
+
+ def _parse_dir(self, dirname):
+ """Internal function to parse files recursively"""
+
+ with os.scandir(dirname) as obj:
+ for entry in obj:
+ name = os.path.join(dirname, entry.name)
+
+ if entry.is_dir(follow_symlinks=False):
+ yield from self._parse_dir(name)
+
+ if not entry.is_file():
+ continue
+
+ basename = os.path.basename(name)
+
+ if not basename.endswith(self.extensions):
+ continue
+
+ yield name
+
+ def parse_files(self, file_list, file_not_found_cb):
+ """
+ Define an iterator to parse all source files from file_list,
+ handling directories if any
+ """
+
+ if not file_list:
+ return
+
+ for fname in file_list:
+ if self.srctree:
+ f = os.path.join(self.srctree, fname)
+ else:
+ f = fname
+
+ if os.path.isdir(f):
+ yield from self._parse_dir(f)
+ elif os.path.isfile(f):
+ yield f
+ elif file_not_found_cb:
+ file_not_found_cb(fname)
+
+
+class KernelFiles():
+ """
+ Parse kernel-doc tags on multiple kernel source files.
+
+ There are two type of parsers defined here:
+ - self.parse_file(): parses both kernel-doc markups and
+ EXPORT_SYMBOL* macros;
+ - self.process_export_file(): parses only EXPORT_SYMBOL* macros.
+ """
+
+ def warning(self, msg):
+ """Ancillary routine to output a warning and increment error count"""
+
+ self.config.log.warning(msg)
+ self.errors += 1
+
+ def error(self, msg):
+ """Ancillary routine to output an error and increment error count"""
+
+ self.config.log.error(msg)
+ self.errors += 1
+
+ def parse_file(self, fname):
+ """
+ Parse a single Kernel source.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table, entries = doc.parse_kdoc()
+
+ self.export_table[fname] = export_table
+
+ self.files.add(fname)
+ self.export_files.add(fname) # parse_kdoc() already check exports
+
+ self.results[fname] = entries
+
+ def process_export_file(self, fname):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.export_files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table = doc.parse_export()
+
+ if not export_table:
+ self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
+ export_table = set()
+
+ self.export_table[fname] = export_table
+ self.export_files.add(fname)
+
+ def file_not_found_cb(self, fname):
+ """
+ Callback to warn if a file was not found.
+ """
+
+ self.error(f"Cannot find file {fname}")
+
+ def __init__(self, verbose=False, out_style=None,
+ werror=False, wreturn=False, wshort_desc=False,
+ wcontents_before_sections=False,
+ logger=None):
+ """
+ Initialize startup variables and parse all files
+ """
+
+ if not verbose:
+ verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+
+ if out_style is None:
+ out_style = OutputFormat()
+
+ if not werror:
+ kcflags = os.environ.get("KCFLAGS", None)
+ if kcflags:
+ match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
+ if match:
+ werror = True
+
+ # reading this variable is for backwards compat just in case
+ # someone was calling it with the variable from outside the
+ # kernel's build system
+ kdoc_werror = os.environ.get("KDOC_WERROR", None)
+ if kdoc_werror:
+ werror = kdoc_werror
+
+ # Some variables are global to the parser logic as a whole as they are
+ # used to send control configuration to KernelDoc class. As such,
+ # those variables are read-only inside the KernelDoc.
+ self.config = argparse.Namespace
+
+ self.config.verbose = verbose
+ self.config.werror = werror
+ self.config.wreturn = wreturn
+ self.config.wshort_desc = wshort_desc
+ self.config.wcontents_before_sections = wcontents_before_sections
+
+ if not logger:
+ self.config.log = logging.getLogger("kernel-doc")
+ else:
+ self.config.log = logger
+
+ self.config.warning = self.warning
+
+ self.config.src_tree = os.environ.get("SRCTREE", None)
+
+ # Initialize variables that are internal to KernelFiles
+
+ self.out_style = out_style
+
+ self.errors = 0
+ self.results = {}
+
+ self.files = set()
+ self.export_files = set()
+ self.export_table = {}
+
+ def parse(self, file_list, export_file=None):
+ """
+ Parse all files
+ """
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in glob.parse_files(file_list, self.file_not_found_cb):
+ self.parse_file(fname)
+
+ for fname in glob.parse_files(export_file, self.file_not_found_cb):
+ self.process_export_file(fname)
+
+ def out_msg(self, fname, name, arg):
+ """
+ Return output messages from a file name using the output style
+ filtering.
+
+ If output type was not handled by the styler, return None.
+ """
+
+ # NOTE: we can add rules here to filter out unwanted parts,
+ # although OutputFormat.msg already does that.
+
+ return self.out_style.msg(fname, name, arg)
+
+ def msg(self, enable_lineno=False, export=False, internal=False,
+ symbol=None, nosymbol=None, no_doc_sections=False,
+ filenames=None, export_file=None):
+ """
+ Interacts over the kernel-doc results and output messages,
+ returning kernel-doc markups on each interaction
+ """
+
+ self.out_style.set_config(self.config)
+
+ if not filenames:
+ filenames = sorted(self.results.keys())
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in filenames:
+ function_table = set()
+
+ if internal or export:
+ if not export_file:
+ export_file = [fname]
+
+ for f in glob.parse_files(export_file, self.file_not_found_cb):
+ function_table |= self.export_table[f]
+
+ if symbol:
+ for s in symbol:
+ function_table.add(s)
+
+ self.out_style.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
+
+ msg = ""
+ if fname not in self.results:
+ self.config.log.warning("No kernel-doc for file %s", fname)
+ continue
+
+ symbols = self.results[fname]
+ self.out_style.set_symbols(symbols)
+
+ for arg in symbols:
+ m = self.out_msg(fname, arg.name, arg)
+
+ if m is None:
+ ln = arg.get("ln", 0)
+ dtype = arg.get('type', "")
+
+ self.config.log.warning("%s:%d Can't handle %s",
+ fname, ln, dtype)
+ else:
+ msg += m
+
+ if msg:
+ yield fname, msg
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
new file mode 100644
index 000000000000..19805301cb2c
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# A class that will, eventually, encapsulate all of the parsed data that we
+# then pass into the output modules.
+#
+
+class KdocItem:
+ def __init__(self, name, fname, type, start_line, **other_stuff):
+ self.name = name
+ self.fname = fname
+ self.type = type
+ self.declaration_start_line = start_line
+ self.sections = {}
+ self.sections_start_lines = {}
+ self.parameterlist = []
+ self.parameterdesc_start_lines = []
+ self.parameterdescs = {}
+ self.parametertypes = {}
+ #
+ # Just save everything else into our own dict so that the output
+ # side can grab it directly as before. As we move things into more
+ # structured data, this will, hopefully, fade away.
+ #
+ self.other_stuff = other_stuff
+
+ def get(self, key, default = None):
+ return self.other_stuff.get(key, default)
+
+ def __getitem__(self, key):
+ return self.get(key)
+
+ #
+ # Tracking of section and parameter information.
+ #
+ def set_sections(self, sections, start_lines):
+ self.sections = sections
+ self.section_start_lines = start_lines
+
+ def set_params(self, names, descs, types, starts):
+ self.parameterlist = names
+ self.parameterdescs = descs
+ self.parametertypes = types
+ self.parameterdesc_start_lines = starts
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
new file mode 100644
index 000000000000..b1aaa7fc3604
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -0,0 +1,824 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
+
+"""
+Implement output filters to print kernel-doc documentation.
+
+The implementation uses a virtual base class (OutputFormat) which
+contains dispatches to virtual methods, and some code to filter
+out output messages.
+
+The actual implementation is done on one separate class per each type
+of output. Currently, there are output classes for ReST and man/troff.
+"""
+
+import os
+import re
+from datetime import datetime
+
+from kdoc.kdoc_parser import KernelDoc, type_param
+from kdoc.kdoc_re import KernRe
+
+
+function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
+
+# match expressions used to find embedded type information
+type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
+type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
+type_func = KernRe(r"(\w+)\(\)", cache=False)
+type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+# Special RST handling for func ptr params
+type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
+
+# Special RST handling for structs with func ptr params
+type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
+
+type_env = KernRe(r"(\$\w+)", cache=False)
+type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
+type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
+type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
+type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
+type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
+type_fallback = KernRe(r"\&([_\w]+)", cache=False)
+type_member_func = type_member + KernRe(r"\(\)", cache=False)
+
+
+class OutputFormat:
+ """
+ Base class for OutputFormat. If used as-is, it means that only
+ warnings will be displayed.
+ """
+
+ # output mode.
+ OUTPUT_ALL = 0 # output all symbols and doc sections
+ OUTPUT_INCLUDE = 1 # output only specified symbols
+ OUTPUT_EXPORTED = 2 # output exported symbols
+ OUTPUT_INTERNAL = 3 # output non-exported symbols
+
+ # Virtual member to be overridden at the inherited classes
+ highlights = []
+
+ def __init__(self):
+ """Declare internal vars and set mode to OUTPUT_ALL"""
+
+ self.out_mode = self.OUTPUT_ALL
+ self.enable_lineno = None
+ self.nosymbol = {}
+ self.symbol = None
+ self.function_table = None
+ self.config = None
+ self.no_doc_sections = False
+
+ self.data = ""
+
+ def set_config(self, config):
+ """
+ Setup global config variables used by both parser and output.
+ """
+
+ self.config = config
+
+ def set_filter(self, export, internal, symbol, nosymbol, function_table,
+ enable_lineno, no_doc_sections):
+ """
+ Initialize filter variables according to the requested mode.
+
+ Only one choice is valid between export, internal and symbol.
+
+ The nosymbol filter can be used on all modes.
+ """
+
+ self.enable_lineno = enable_lineno
+ self.no_doc_sections = no_doc_sections
+ self.function_table = function_table
+
+ if symbol:
+ self.out_mode = self.OUTPUT_INCLUDE
+ elif export:
+ self.out_mode = self.OUTPUT_EXPORTED
+ elif internal:
+ self.out_mode = self.OUTPUT_INTERNAL
+ else:
+ self.out_mode = self.OUTPUT_ALL
+
+ if nosymbol:
+ self.nosymbol = set(nosymbol)
+
+
+ def highlight_block(self, block):
+ """
+ Apply the RST highlights to a sub-block of text.
+ """
+
+ for r, sub in self.highlights:
+ block = r.sub(sub, block)
+
+ return block
+
+ def out_warnings(self, args):
+ """
+ Output warnings for identifiers that will be displayed.
+ """
+
+ for log_msg in args.warnings:
+ self.config.warning(log_msg)
+
+ def check_doc(self, name, args):
+ """Check if DOC should be output"""
+
+ if self.no_doc_sections:
+ return False
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode == self.OUTPUT_INCLUDE:
+ if name in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def check_declaration(self, dtype, name, args):
+ """
+ Checks if a declaration should be output or not based on the
+ filtering criteria.
+ """
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
+ if name in self.function_table:
+ return True
+
+ if self.out_mode == self.OUTPUT_INTERNAL:
+ if dtype != "function":
+ self.out_warnings(args)
+ return True
+
+ if name not in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def msg(self, fname, name, args):
+ """
+ Handles a single entry from kernel-doc parser
+ """
+
+ self.data = ""
+
+ dtype = args.type
+
+ if dtype == "doc":
+ self.out_doc(fname, name, args)
+ return self.data
+
+ if not self.check_declaration(dtype, name, args):
+ return self.data
+
+ if dtype == "function":
+ self.out_function(fname, name, args)
+ return self.data
+
+ if dtype == "enum":
+ self.out_enum(fname, name, args)
+ return self.data
+
+ if dtype == "typedef":
+ self.out_typedef(fname, name, args)
+ return self.data
+
+ if dtype in ["struct", "union"]:
+ self.out_struct(fname, name, args)
+ return self.data
+
+ # Warn if some type requires an output logic
+ self.config.log.warning("doesn't know how to output '%s' block",
+ dtype)
+
+ return None
+
+ # Virtual methods to be overridden by inherited classes
+ # At the base class, those do nothing.
+ def set_symbols(self, symbols):
+ """Get a list of all symbols from kernel_doc"""
+
+ def out_doc(self, fname, name, args):
+ """Outputs a DOC block"""
+
+ def out_function(self, fname, name, args):
+ """Outputs a function"""
+
+ def out_enum(self, fname, name, args):
+ """Outputs an enum"""
+
+ def out_typedef(self, fname, name, args):
+ """Outputs a typedef"""
+
+ def out_struct(self, fname, name, args):
+ """Outputs a struct"""
+
+
+class RestFormat(OutputFormat):
+ """Consts and functions used by ReST output"""
+
+ highlights = [
+ (type_constant, r"``\1``"),
+ (type_constant2, r"``\1``"),
+
+ # Note: need to escape () to avoid func matching later
+ (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
+ (type_member, r":c:type:`\1\2\3 <\1>`"),
+ (type_fp_param, r"**\1\\(\\)**"),
+ (type_fp_param2, r"**\1\\(\\)**"),
+ (type_func, r"\1()"),
+ (type_enum, r":c:type:`\1 <\2>`"),
+ (type_struct, r":c:type:`\1 <\2>`"),
+ (type_typedef, r":c:type:`\1 <\2>`"),
+ (type_union, r":c:type:`\1 <\2>`"),
+
+ # in rst this can refer to any type
+ (type_fallback, r":c:type:`\1`"),
+ (type_param_ref, r"**\1\2**")
+ ]
+ blankline = "\n"
+
+ sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
+ sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
+
+ def __init__(self):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.lineprefix = ""
+
+ def print_lineno(self, ln):
+ """Outputs a line number"""
+
+ if self.enable_lineno and ln is not None:
+ ln += 1
+ self.data += f".. LINENO {ln}\n"
+
+ def output_highlight(self, args):
+ """
+ Outputs a C symbol that may require being converted to ReST using
+ the self.highlights variable
+ """
+
+ input_text = args
+ output = ""
+ in_literal = False
+ litprefix = ""
+ block = ""
+
+ for line in input_text.strip("\n").split("\n"):
+
+ # If we're in a literal block, see if we should drop out of it.
+ # Otherwise, pass the line straight through unmunged.
+ if in_literal:
+ if line.strip(): # If the line is not blank
+ # If this is the first non-blank line in a literal block,
+ # figure out the proper indent.
+ if not litprefix:
+ r = KernRe(r'^(\s*)')
+ if r.match(line):
+ litprefix = '^' + r.group(1)
+ else:
+ litprefix = ""
+
+ output += line + "\n"
+ elif not KernRe(litprefix).match(line):
+ in_literal = False
+ else:
+ output += line + "\n"
+ else:
+ output += line + "\n"
+
+ # Not in a literal block (or just dropped out)
+ if not in_literal:
+ block += line + "\n"
+ if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
+ in_literal = True
+ litprefix = ""
+ output += self.highlight_block(block)
+ block = ""
+
+ # Handle any remaining block
+ if block:
+ output += self.highlight_block(block)
+
+ # Print the output with the line prefix
+ for line in output.strip("\n").split("\n"):
+ self.data += self.lineprefix + line + "\n"
+
+ def out_section(self, args, out_docblock=False):
+ """
+ Outputs a block section.
+
+ This could use some work; it's used to output the DOC: sections, and
+ starts by putting out the name of the doc section itself, but that
+ tends to duplicate a header already in the template file.
+ """
+ for section, text in args.sections.items():
+ # Skip sections that are in the nosymbol_table
+ if section in self.nosymbol:
+ continue
+
+ if out_docblock:
+ if not self.out_mode == self.OUTPUT_INCLUDE:
+ self.data += f".. _{section}:\n\n"
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+ else:
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+
+ self.print_lineno(args.section_start_lines.get(section, 0))
+ self.output_highlight(text)
+ self.data += "\n"
+ self.data += "\n"
+
+ def out_doc(self, fname, name, args):
+ if not self.check_doc(name, args):
+ return
+ self.out_section(args, out_docblock=True)
+
+ def out_function(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ signature = ""
+
+ func_macro = args.get('func_macro', False)
+ if func_macro:
+ signature = name
+ else:
+ if args.get('functiontype'):
+ signature = args['functiontype'] + " "
+ signature += name + " ("
+
+ ln = args.declaration_start_line
+ count = 0
+ for parameter in args.parameterlist:
+ if count != 0:
+ signature += ", "
+ count += 1
+ dtype = args.parametertypes.get(parameter, "")
+
+ if function_pointer.search(dtype):
+ signature += function_pointer.group(1) + parameter + function_pointer.group(3)
+ else:
+ signature += dtype
+
+ if not func_macro:
+ signature += ")"
+
+ self.print_lineno(ln)
+ if args.get('typedef') or not args.get('functiontype'):
+ self.data += f".. c:macro:: {name}\n\n"
+
+ if args.get('typedef'):
+ self.data += " **Typedef**: "
+ self.lineprefix = ""
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n\n**Syntax**\n\n"
+ self.data += f" ``{signature}``\n\n"
+ else:
+ self.data += f"``{signature}``\n\n"
+ else:
+ self.data += f".. c:function:: {signature}\n\n"
+
+ if not args.get('typedef'):
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n"
+
+ # Put descriptive text into a container (HTML <div>) to help set
+ # function prototypes apart
+ self.lineprefix = " "
+
+ if args.parameterlist:
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Parameters**\n\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ dtype = args.parametertypes.get(parameter, "")
+
+ if dtype:
+ self.data += f"{self.lineprefix}``{dtype}``\n"
+ else:
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.lineprefix = " "
+ if parameter_name in args.parameterdescs and \
+ args.parameterdescs[parameter_name] != KernelDoc.undescribed:
+
+ self.output_highlight(args.parameterdescs[parameter_name])
+ self.data += "\n"
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.lineprefix = " "
+
+ self.out_section(args)
+ self.lineprefix = oldprefix
+
+ def out_enum(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:enum:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ''))
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ outer = self.lineprefix + " "
+ self.lineprefix = outer + " "
+ self.data += f"{outer}**Constants**\n\n"
+
+ for parameter in args.parameterlist:
+ self.data += f"{outer}``{parameter}``\n"
+
+ if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
+ self.output_highlight(args.parameterdescs[parameter])
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_typedef(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:type:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+
+ self.output_highlight(args.get('purpose', ''))
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_struct(self, fname, name, args):
+
+ purpose = args.get('purpose', "")
+ declaration = args.get('definition', "")
+ dtype = args.type
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
+
+ self.print_lineno(ln)
+
+ oldprefix = self.lineprefix
+ self.lineprefix += " "
+
+ self.output_highlight(purpose)
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Definition**::\n\n"
+
+ self.lineprefix = self.lineprefix + " "
+
+ declaration = declaration.replace("\t", self.lineprefix)
+
+ self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
+ self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
+
+ self.lineprefix = " "
+ self.data += f"{self.lineprefix}**Members**\n\n"
+ for parameter in args.parameterlist:
+ if not parameter or parameter.startswith("#"):
+ continue
+
+ parameter_name = parameter.split("[", maxsplit=1)[0]
+
+ if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.lineprefix = " "
+ self.output_highlight(args.parameterdescs[parameter_name])
+ self.lineprefix = " "
+
+ self.data += "\n"
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+
+class ManFormat(OutputFormat):
+ """Consts and functions used by man pages output"""
+
+ highlights = (
+ (type_constant, r"\1"),
+ (type_constant2, r"\1"),
+ (type_func, r"\\fB\1\\fP"),
+ (type_enum, r"\\fI\1\\fP"),
+ (type_struct, r"\\fI\1\\fP"),
+ (type_typedef, r"\\fI\1\\fP"),
+ (type_union, r"\\fI\1\\fP"),
+ (type_param, r"\\fI\1\\fP"),
+ (type_param_ref, r"\\fI\1\2\\fP"),
+ (type_member, r"\\fI\1\2\3\\fP"),
+ (type_fallback, r"\\fI\1\\fP")
+ )
+ blankline = ""
+
+ date_formats = [
+ "%a %b %d %H:%M:%S %Z %Y",
+ "%a %b %d %H:%M:%S %Y",
+ "%Y-%m-%d",
+ "%b %d %Y",
+ "%B %d %Y",
+ "%m %d %Y",
+ ]
+
+ def __init__(self, modulename):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.modulename = modulename
+ self.symbols = []
+
+ dt = None
+ tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
+ if tstamp:
+ for fmt in self.date_formats:
+ try:
+ dt = datetime.strptime(tstamp, fmt)
+ break
+ except ValueError:
+ pass
+
+ if not dt:
+ dt = datetime.now()
+
+ self.man_date = dt.strftime("%B %Y")
+
+ def arg_name(self, args, name):
+ """
+ Return the name that will be used for the man page.
+
+ As we may have the same name on different namespaces,
+ prepend the data type for all types except functions and typedefs.
+
+ The doc section is special: it uses the modulename.
+ """
+
+ dtype = args.type
+
+ if dtype == "doc":
+ return self.modulename
+
+ if dtype in ["function", "typedef"]:
+ return name
+
+ return f"{dtype} {name}"
+
+ def set_symbols(self, symbols):
+ """
+ Get a list of all symbols from kernel_doc.
+
+ Man pages will uses it to add a SEE ALSO section with other
+ symbols at the same file.
+ """
+ self.symbols = symbols
+
+ def out_tail(self, fname, name, args):
+ """Adds a tail for all man pages"""
+
+ # SEE ALSO section
+ self.data += f'.SH "SEE ALSO"' + "\n.PP\n"
+ self.data += (f"Kernel file \\fB{args.fname}\\fR\n")
+ if len(self.symbols) >= 2:
+ cur_name = self.arg_name(args, name)
+
+ related = []
+ for arg in self.symbols:
+ out_name = self.arg_name(arg, arg.name)
+
+ if cur_name == out_name:
+ continue
+
+ related.append(f"\\fB{out_name}\\fR(9)")
+
+ self.data += ",\n".join(related) + "\n"
+
+ # TODO: does it make sense to add other sections? Maybe
+ # REPORTING ISSUES? LICENSE?
+
+ def msg(self, fname, name, args):
+ """
+ Handles a single entry from kernel-doc parser.
+
+ Add a tail at the end of man pages output.
+ """
+ super().msg(fname, name, args)
+ self.out_tail(fname, name, args)
+
+ return self.data
+
+ def output_highlight(self, block):
+ """
+ Outputs a C symbol that may require being highlighted with
+ self.highlights variable using troff syntax
+ """
+
+ contents = self.highlight_block(block)
+
+ if isinstance(contents, list):
+ contents = "\n".join(contents)
+
+ for line in contents.strip("\n").split("\n"):
+ line = KernRe(r"^\s*").sub("", line)
+ if not line:
+ continue
+
+ if line[0] == ".":
+ self.data += "\\&" + line + "\n"
+ else:
+ self.data += line + "\n"
+
+ def out_doc(self, fname, name, args):
+ if not self.check_doc(name, args):
+ return
+
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_function(self, fname, name, args):
+ """output function in man"""
+
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{name} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ if args.get('functiontype', ''):
+ self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
+ else:
+ self.data += f'.B "{name}' + "\n"
+
+ count = 0
+ parenth = "("
+ post = ","
+
+ for parameter in args.parameterlist:
+ if count == len(args.parameterlist) - 1:
+ post = ");"
+
+ dtype = args.parametertypes.get(parameter, "")
+ if function_pointer.match(dtype):
+ # Pointer-to-function
+ self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
+ else:
+ dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
+
+ self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n"
+ count += 1
+ parenth = ""
+
+ if args.parameterlist:
+ self.data += ".SH ARGUMENTS\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = re.sub(r'\[.*', '', parameter)
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section.upper()}"' + "\n"
+ self.output_highlight(text)
+
+ def out_enum(self, fname, name, args):
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"enum {name} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"enum {name}" + " {\n"
+
+ count = 0
+ for parameter in args.parameterlist:
+ self.data += f'.br\n.BI " {parameter}"' + "\n"
+ if count == len(args.parameterlist) - 1:
+ self.data += "\n};\n"
+ else:
+ self.data += ", \n.br\n"
+
+ count += 1
+
+ self.data += ".SH Constants\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_typedef(self, fname, name, args):
+ module = self.modulename
+ purpose = args.get('purpose')
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"typedef {name} \\- {purpose}\n"
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_struct(self, fname, name, args):
+ module = self.modulename
+ purpose = args.get('purpose')
+ definition = args.get('definition')
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{args.type} {name} \\- {purpose}\n"
+
+ # Replace tabs with two spaces and handle newlines
+ declaration = definition.replace("\t", " ")
+ declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"{args.type} {name} " + "{" + "\n.br\n"
+ self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
+
+ self.data += ".SH Members\n"
+ for parameter in args.parameterlist:
+ if parameter.startswith("#"):
+ continue
+
+ parameter_name = re.sub(r"\[.*", "", parameter)
+
+ if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
new file mode 100644
index 000000000000..500aafc50032
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -0,0 +1,1670 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
+
+"""
+kdoc_parser
+===========
+
+Read a C language source or header FILE and extract embedded
+documentation comments
+"""
+
+import sys
+import re
+from pprint import pformat
+
+from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_item import KdocItem
+
+#
+# Regular expressions used to parse kernel-doc markups at KernelDoc class.
+#
+# Let's declare them in lowercase outside any class to make it easier to
+# convert from the Perl script.
+#
+# As those are evaluated at the beginning, no need to cache them
+#
+
+# Allow whitespace at end of comment start.
+doc_start = KernRe(r'^/\*\*\s*$', cache=False)
+
+doc_end = KernRe(r'\*/', cache=False)
+doc_com = KernRe(r'\s*\*\s*', cache=False)
+doc_com_body = KernRe(r'\s*\* ?', cache=False)
+doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
+
+# @params and a strictly limited set of supported section names
+# Specifically:
+# Match @word:
+# @...:
+# @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+known_section_names = 'description|context|returns?|notes?|examples?'
+known_sections = KernRe(known_section_names, flags = re.I)
+doc_sect = doc_com + \
+ KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
+ flags=re.I, cache=False)
+
+doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
+doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
+doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
+doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
+doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
+
+export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
+export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
+
+type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+#
+# Tests for the beginning of a kerneldoc block in its various forms.
+#
+doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
+doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False)
+doc_begin_func = KernRe(str(doc_com) + # initial " * '
+ r"(?:\w+\s*\*\s*)?" + # type (not captured)
+ r'(?:define\s+)?' + # possible "define" (not captured)
+ r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)"
+ r'(?:[-:].*)?$', # description (not captured)
+ cache = False)
+
+#
+# Here begins a long set of transformations to turn structure member prefixes
+# and macro invocations into something we can parse and generate kdoc for.
+#
+struct_args_pattern = r'([^,)]+)'
+
+struct_xforms = [
+ # Strip attributes
+ (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+ (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__packed\s*', re.S), ' '),
+ (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+ (KernRe(r'\s*__private', re.S), ' '),
+ (KernRe(r'\s*__rcu', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+ (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+ #
+ # Unwrap struct_group macros based on this definition:
+ # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+ # which has variants like: struct_group(NAME, MEMBERS...)
+ # Only MEMBERS arguments require documentation.
+ #
+ # Parsing them happens on two steps:
+ #
+ # 1. drop struct group arguments that aren't at MEMBERS,
+ # storing them as STRUCT_GROUP(MEMBERS)
+ #
+ # 2. remove STRUCT_GROUP() ancillary macro.
+ #
+ # The original logic used to remove STRUCT_GROUP() using an
+ # advanced regex:
+ #
+ # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+ #
+ # with two patterns that are incompatible with
+ # Python re module, as it has:
+ #
+ # - a recursive pattern: (?1)
+ # - an atomic grouping: (?>...)
+ #
+ # I tried a simpler version: but it didn't work either:
+ # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+ #
+ # As it doesn't properly match the end parenthesis on some cases.
+ #
+ # So, a better solution was crafted: there's now a NestedMatch
+ # class that ensures that delimiters after a search are properly
+ # matched. So, the implementation to drop STRUCT_GROUP() will be
+ # handled in separate.
+ #
+ (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+ (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+ #
+ # Replace macros
+ #
+ # TODO: use NestedMatch for FOO($1, $2, ...) matches
+ #
+ # it is better to also move those to the NestedMatch logic,
+ # to ensure that parentheses will be properly matched.
+ #
+ (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+ (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+ (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+ (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+ (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+ r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+]
+#
+# Regexes here are guaranteed to have the end delimiter matching
+# the start delimiter. Yet, right now, only one replace group
+# is allowed.
+#
+struct_nested_prefixes = [
+ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+]
+
+#
+# Transforms for function prototypes
+#
+function_xforms = [
+ (KernRe(r"^static +"), ""),
+ (KernRe(r"^extern +"), ""),
+ (KernRe(r"^asmlinkage +"), ""),
+ (KernRe(r"^inline +"), ""),
+ (KernRe(r"^__inline__ +"), ""),
+ (KernRe(r"^__inline +"), ""),
+ (KernRe(r"^__always_inline +"), ""),
+ (KernRe(r"^noinline +"), ""),
+ (KernRe(r"^__FORTIFY_INLINE +"), ""),
+ (KernRe(r"__init +"), ""),
+ (KernRe(r"__init_or_module +"), ""),
+ (KernRe(r"__deprecated +"), ""),
+ (KernRe(r"__flatten +"), ""),
+ (KernRe(r"__meminit +"), ""),
+ (KernRe(r"__must_check +"), ""),
+ (KernRe(r"__weak +"), ""),
+ (KernRe(r"__sched +"), ""),
+ (KernRe(r"_noprof"), ""),
+ (KernRe(r"__always_unused *"), ""),
+ (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+ (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+ (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+ (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+ (KernRe(r"__attribute_const__ +"), ""),
+ (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+]
+
+#
+# Apply a set of transforms to a block of text.
+#
+def apply_transforms(xforms, text):
+ for search, subst in xforms:
+ text = search.sub(subst, text)
+ return text
+
+#
+# A little helper to get rid of excess white space
+#
+multi_space = KernRe(r'\s\s+')
+def trim_whitespace(s):
+ return multi_space.sub(' ', s.strip())
+
+#
+# Remove struct/enum members that have been marked "private".
+#
+def trim_private_members(text):
+ #
+ # First look for a "public:" block that ends a private region, then
+ # handle the "private until the end" case.
+ #
+ text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
+ text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
+ #
+ # We needed the comments to do the above, but now we can take them out.
+ #
+ return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+class state:
+ """
+ State machine enums
+ """
+
+ # Parser states
+ NORMAL = 0 # normal code
+ NAME = 1 # looking for function name
+ DECLARATION = 2 # We have seen a declaration which might not be done
+ BODY = 3 # the body of the comment
+ SPECIAL_SECTION = 4 # doc section ending with a blank line
+ PROTO = 5 # scanning prototype
+ DOCBLOCK = 6 # documentation block
+ INLINE_NAME = 7 # gathering doc outside main block
+ INLINE_TEXT = 8 # reading the body of inline docs
+
+ name = [
+ "NORMAL",
+ "NAME",
+ "DECLARATION",
+ "BODY",
+ "SPECIAL_SECTION",
+ "PROTO",
+ "DOCBLOCK",
+ "INLINE_NAME",
+ "INLINE_TEXT",
+ ]
+
+
+SECTION_DEFAULT = "Description" # default section
+
+class KernelEntry:
+
+ def __init__(self, config, fname, ln):
+ self.config = config
+ self.fname = fname
+
+ self._contents = []
+ self.prototype = ""
+
+ self.warnings = []
+
+ self.parameterlist = []
+ self.parameterdescs = {}
+ self.parametertypes = {}
+ self.parameterdesc_start_lines = {}
+
+ self.section_start_lines = {}
+ self.sections = {}
+
+ self.anon_struct_union = False
+
+ self.leading_space = None
+
+ self.fname = fname
+
+ # State flags
+ self.brcount = 0
+ self.declaration_start_line = ln + 1
+
+ #
+ # Management of section contents
+ #
+ def add_text(self, text):
+ self._contents.append(text)
+
+ def contents(self):
+ return '\n'.join(self._contents) + '\n'
+
+ # TODO: rename to emit_message after removal of kernel-doc.pl
+ def emit_msg(self, ln, msg, *, warning=True):
+ """Emit a message"""
+
+ log_msg = f"{self.fname}:{ln} {msg}"
+
+ if not warning:
+ self.config.log.info(log_msg)
+ return
+
+ # Delegate warning output to output logic, as this way it
+ # will report warnings/info only for symbols that are output
+
+ self.warnings.append(log_msg)
+ return
+
+ #
+ # Begin a new section.
+ #
+ def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
+ if dump:
+ self.dump_section(start_new = True)
+ self.section = title
+ self.new_start_line = line_no
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+ #
+ # If we have accumulated no contents in the default ("description")
+ # section, don't bother.
+ #
+ if self.section == SECTION_DEFAULT and not self._contents:
+ return
+ name = self.section
+ contents = self.contents()
+
+ if type_param.match(name):
+ name = type_param.group(1)
+
+ self.parameterdescs[name] = contents
+ self.parameterdesc_start_lines[name] = self.new_start_line
+
+ self.new_start_line = 0
+
+ else:
+ if name in self.sections and self.sections[name] != "":
+ # Only warn on user-specified duplicate section names
+ if name != SECTION_DEFAULT:
+ self.emit_msg(self.new_start_line,
+ f"duplicate section name '{name}'")
+ # Treat as a new paragraph - add a blank line
+ self.sections[name] += '\n' + contents
+ else:
+ self.sections[name] = contents
+ self.section_start_lines[name] = self.new_start_line
+ self.new_start_line = 0
+
+# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
+
+ if start_new:
+ self.section = SECTION_DEFAULT
+ self._contents = []
+
+python_warning = False
+
+class KernelDoc:
+ """
+ Read a C language source or header FILE and extract embedded
+ documentation comments.
+ """
+
+ # Section names
+
+ section_context = "Context"
+ section_return = "Return"
+
+ undescribed = "-- undescribed --"
+
+ def __init__(self, config, fname):
+ """Initialize internal variables"""
+
+ self.fname = fname
+ self.config = config
+
+ # Initial state for the state machines
+ self.state = state.NORMAL
+
+ # Store entry currently being processed
+ self.entry = None
+
+ # Place all potential outputs into an array
+ self.entries = []
+
+ #
+ # We need Python 3.7 for its "dicts remember the insertion
+ # order" guarantee
+ #
+ global python_warning
+ if (not python_warning and
+ sys.version_info.major == 3 and sys.version_info.minor < 7):
+
+ self.emit_msg(0,
+ 'Python 3.7 or later is required for correct results')
+ python_warning = True
+
+ def emit_msg(self, ln, msg, *, warning=True):
+ """Emit a message"""
+
+ if self.entry:
+ self.entry.emit_msg(ln, msg, warning=warning)
+ return
+
+ log_msg = f"{self.fname}:{ln} {msg}"
+
+ if warning:
+ self.config.log.warning(log_msg)
+ else:
+ self.config.log.info(log_msg)
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+
+ if self.entry:
+ self.entry.dump_section(start_new)
+
+ # TODO: rename it to store_declaration after removal of kernel-doc.pl
+ def output_declaration(self, dtype, name, **args):
+ """
+ Stores the entry into an entry array.
+
+ The actual output and output filters will be handled elsewhere
+ """
+
+ item = KdocItem(name, self.fname, dtype,
+ self.entry.declaration_start_line, **args)
+ item.warnings = self.entry.warnings
+
+ # Drop empty sections
+ # TODO: improve empty sections logic to emit warnings
+ sections = self.entry.sections
+ for section in ["Description", "Return"]:
+ if section in sections and not sections[section].rstrip():
+ del sections[section]
+ item.set_sections(sections, self.entry.section_start_lines)
+ item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
+ self.entry.parametertypes,
+ self.entry.parameterdesc_start_lines)
+ self.entries.append(item)
+
+ self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
+
+ def reset_state(self, ln):
+ """
+ Ancillary routine to create a new entry. It initializes all
+ variables used by the state machine.
+ """
+
+ #
+ # Flush the warnings out before we proceed further
+ #
+ if self.entry and self.entry not in self.entries:
+ for log_msg in self.entry.warnings:
+ self.config.log.warning(log_msg)
+
+ self.entry = KernelEntry(self.config, self.fname, ln)
+
+ # State flags
+ self.state = state.NORMAL
+
+ def push_parameter(self, ln, decl_type, param, dtype,
+ org_arg, declaration_name):
+ """
+ Store parameters and their descriptions at self.entry.
+ """
+
+ if self.entry.anon_struct_union and dtype == "" and param == "}":
+ return # Ignore the ending }; from anonymous struct/union
+
+ self.entry.anon_struct_union = False
+
+ param = KernRe(r'[\[\)].*').sub('', param, count=1)
+
+ #
+ # Look at various "anonymous type" cases.
+ #
+ if dtype == '':
+ if param.endswith("..."):
+ if len(param) > 3: # there is a name provided, use that
+ param = param[:-3]
+ if not self.entry.parameterdescs.get(param):
+ self.entry.parameterdescs[param] = "variable arguments"
+
+ elif (not param) or param == "void":
+ param = "void"
+ self.entry.parameterdescs[param] = "no arguments"
+
+ elif param in ["struct", "union"]:
+ # Handle unnamed (anonymous) union or struct
+ dtype = param
+ param = "{unnamed_" + param + "}"
+ self.entry.parameterdescs[param] = "anonymous\n"
+ self.entry.anon_struct_union = True
+
+ # Warn if parameter has no description
+ # (but ignore ones starting with # as these are not parameters
+ # but inline preprocessor statements)
+ if param not in self.entry.parameterdescs and not param.startswith("#"):
+ self.entry.parameterdescs[param] = self.undescribed
+
+ if "." not in param:
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+
+ self.emit_msg(ln,
+ f"{dname} '{param}' not described in '{declaration_name}'")
+
+ # Strip spaces from param so that it is one continuous string on
+ # parameterlist. This fixes a problem where check_sections()
+ # cannot find a parameter like "addr[6 + 2]" because it actually
+ # appears as "addr[6", "+", "2]" on the parameter list.
+ # However, it's better to maintain the param string unchanged for
+ # output, so just weaken the string compare in check_sections()
+ # to ignore "[blah" in a parameter string.
+
+ self.entry.parameterlist.append(param)
+ org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
+ self.entry.parametertypes[param] = org_arg
+
+
+ def create_parameter_list(self, ln, decl_type, args,
+ splitter, declaration_name):
+ """
+ Creates a list of parameters, storing them at self.entry.
+ """
+
+ # temporarily replace all commas inside function pointer definition
+ arg_expr = KernRe(r'(\([^\),]+),')
+ while arg_expr.search(args):
+ args = arg_expr.sub(r"\1#", args)
+
+ for arg in args.split(splitter):
+ # Ignore argument attributes
+ arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
+
+ # Strip leading/trailing spaces
+ arg = arg.strip()
+ arg = KernRe(r'\s+').sub(' ', arg, count=1)
+
+ if arg.startswith('#'):
+ # Treat preprocessor directive as a typeless variable just to fill
+ # corresponding data structures "correctly". Catch it later in
+ # output_* subs.
+
+ # Treat preprocessor directive as a typeless variable
+ self.push_parameter(ln, decl_type, arg, "",
+ "", declaration_name)
+ #
+ # The pointer-to-function case.
+ #
+ elif KernRe(r'\(.+\)\s*\(').search(arg):
+ arg = arg.replace('#', ',')
+ r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*"
+ r'([\w\[\].]*)' # Capture the name and possible [array]
+ r'\s*\)') # Make sure the trailing ")" is there
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+ dtype = arg.replace(param, '')
+ self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+ #
+ # The array-of-pointers case. Dig the parameter name out from the middle
+ # of the declaration.
+ #
+ elif KernRe(r'\(.+\)\s*\[').search(arg):
+ r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*"
+ r'([\w.]*?)' # The actual pointer name
+ r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+ dtype = arg.replace(param, '')
+ self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+ elif arg:
+ #
+ # Clean up extraneous spaces and split the string at commas; the first
+ # element of the resulting list will also include the type information.
+ #
+ arg = KernRe(r'\s*:\s*').sub(":", arg)
+ arg = KernRe(r'\s*\[').sub('[', arg)
+ args = KernRe(r'\s*,\s*').split(arg)
+ args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
+ #
+ # args[0] has a string of "type a". If "a" includes an [array]
+ # declaration, we want to not be fooled by any white space inside
+ # the brackets, so detect and handle that case specially.
+ #
+ r = KernRe(r'^([^[\]]*\s+)(.*)$')
+ if r.match(args[0]):
+ args[0] = r.group(2)
+ dtype = r.group(1)
+ else:
+ # No space in args[0]; this seems wrong but preserves previous behavior
+ dtype = ''
+
+ bitfield_re = KernRe(r'(.*?):(\w+)')
+ for param in args:
+ #
+ # For pointers, shift the star(s) from the variable name to the
+ # type declaration.
+ #
+ r = KernRe(r'^(\*+)\s*(.*)')
+ if r.match(param):
+ self.push_parameter(ln, decl_type, r.group(2),
+ f"{dtype} {r.group(1)}",
+ arg, declaration_name)
+ #
+ # Perform a similar shift for bitfields.
+ #
+ elif bitfield_re.search(param):
+ if dtype != "": # Skip unnamed bit-fields
+ self.push_parameter(ln, decl_type, bitfield_re.group(1),
+ f"{dtype}:{bitfield_re.group(2)}",
+ arg, declaration_name)
+ else:
+ self.push_parameter(ln, decl_type, param, dtype,
+ arg, declaration_name)
+
+ def check_sections(self, ln, decl_name, decl_type):
+ """
+ Check for errors inside sections, emitting warnings if not found
+ parameters are described.
+ """
+ for section in self.entry.sections:
+ if section not in self.entry.parameterlist and \
+ not known_sections.search(section):
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+ self.emit_msg(ln,
+ f"Excess {dname} '{section}' description in '{decl_name}'")
+
+ def check_return_section(self, ln, declaration_name, return_type):
+ """
+ If the function doesn't return void, warns about the lack of a
+ return description.
+ """
+
+ if not self.config.wreturn:
+ return
+
+ # Ignore an empty return type (It's a macro)
+ # Ignore functions with a "void" return type (but not "void *")
+ if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
+ return
+
+ if not self.entry.sections.get("Return", None):
+ self.emit_msg(ln,
+ f"No description found for return value of '{declaration_name}'")
+
+ #
+ # Split apart a structure prototype; returns (struct|union, name, members) or None
+ #
+ def split_struct_proto(self, proto):
+ type_pattern = r'(struct|union)'
+ qualifiers = [
+ "__attribute__",
+ "__packed",
+ "__aligned",
+ "____cacheline_aligned_in_smp",
+ "____cacheline_aligned",
+ ]
+ definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
+
+ r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
+ if r.search(proto):
+ return (r.group(1), r.group(2), r.group(3))
+ else:
+ r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
+ if r.search(proto):
+ return (r.group(1), r.group(3), r.group(2))
+ return None
+ #
+ # Rewrite the members of a structure or union for easier formatting later on.
+ # Among other things, this function will turn a member like:
+ #
+ # struct { inner_members; } foo;
+ #
+ # into:
+ #
+ # struct foo; inner_members;
+ #
+ def rewrite_struct_members(self, members):
+ #
+ # Process struct/union members from the most deeply nested outward. The
+ # trick is in the ^{ below - it prevents a match of an outer struct/union
+ # until the inner one has been munged (removing the "{" in the process).
+ #
+ struct_members = KernRe(r'(struct|union)' # 0: declaration type
+ r'([^\{\};]+)' # 1: possible name
+ r'(\{)'
+ r'([^\{\}]*)' # 3: Contents of declaration
+ r'(\})'
+ r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration
+ tuples = struct_members.findall(members)
+ while tuples:
+ for t in tuples:
+ newmember = ""
+ oldmember = "".join(t) # Reconstruct the original formatting
+ dtype, name, lbr, content, rbr, rest, semi = t
+ #
+ # Pass through each field name, normalizing the form and formatting.
+ #
+ for s_id in rest.split(','):
+ s_id = s_id.strip()
+ newmember += f"{dtype} {s_id}; "
+ #
+ # Remove bitfield/array/pointer info, getting the bare name.
+ #
+ s_id = KernRe(r'[:\[].*').sub('', s_id)
+ s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
+ #
+ # Pass through the members of this inner structure/union.
+ #
+ for arg in content.split(';'):
+ arg = arg.strip()
+ #
+ # Look for (type)(*name)(args) - pointer to function
+ #
+ r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
+ if r.match(arg):
+ dtype, name, extra = r.group(1), r.group(2), r.group(3)
+ # Pointer-to-function
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{dtype}{name}{extra}; "
+ else:
+ newmember += f"{dtype}{s_id}.{name}{extra}; "
+ #
+ # Otherwise a non-function member.
+ #
+ else:
+ #
+ # Remove bitmap and array portions and spaces around commas
+ #
+ arg = KernRe(r':\s*\d+\s*').sub('', arg)
+ arg = KernRe(r'\[.*\]').sub('', arg)
+ arg = KernRe(r'\s*,\s*').sub(',', arg)
+ #
+ # Look for a normal decl - "type name[,name...]"
+ #
+ r = KernRe(r'(.*)\s+([\S+,]+)')
+ if r.search(arg):
+ for name in r.group(2).split(','):
+ name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{r.group(1)} {name}; "
+ else:
+ newmember += f"{r.group(1)} {s_id}.{name}; "
+ else:
+ newmember += f"{arg}; "
+ #
+ # At the end of the s_id loop, replace the original declaration with
+ # the munged version.
+ #
+ members = members.replace(oldmember, newmember)
+ #
+ # End of the tuple loop - search again and see if there are outer members
+ # that now turn up.
+ #
+ tuples = struct_members.findall(members)
+ return members
+
+ #
+ # Format the struct declaration into a standard form for inclusion in the
+ # resulting docs.
+ #
+ def format_struct_decl(self, declaration):
+ #
+ # Insert newlines, get rid of extra spaces.
+ #
+ declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
+ declaration = KernRe(r'\}\s+;').sub('};', declaration)
+ #
+ # Format inline enums with each member on its own line.
+ #
+ r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
+ while r.search(declaration):
+ declaration = r.sub(r'\1,\n\2', declaration)
+ #
+ # Now go through and supply the right number of tabs
+ # for each line.
+ #
+ def_args = declaration.split('\n')
+ level = 1
+ declaration = ""
+ for clause in def_args:
+ clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
+ if clause:
+ if '}' in clause and level > 1:
+ level -= 1
+ if not clause.startswith('#'):
+ declaration += "\t" * level
+ declaration += "\t" + clause + "\n"
+ if "{" in clause and "}" not in clause:
+ level += 1
+ return declaration
+
+
+ def dump_struct(self, ln, proto):
+ """
+ Store an entry for a struct or union
+ """
+ #
+ # Do the basic parse to get the pieces of the declaration.
+ #
+ struct_parts = self.split_struct_proto(proto)
+ if not struct_parts:
+ self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
+ return
+ decl_type, declaration_name, members = struct_parts
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
+ f"Prototype was for {decl_type} {declaration_name} instead\n")
+ return
+ #
+ # Go through the list of members applying all of our transformations.
+ #
+ members = trim_private_members(members)
+ members = apply_transforms(struct_xforms, members)
+
+ nested = NestedMatch()
+ for search, sub in struct_nested_prefixes:
+ members = nested.sub(search, sub, members)
+ #
+ # Deal with embedded struct and union members, and drop enums entirely.
+ #
+ declaration = members
+ members = self.rewrite_struct_members(members)
+ members = re.sub(r'(\{[^\{\}]*\})', '', members)
+ #
+ # Output the result and we are done.
+ #
+ self.create_parameter_list(ln, decl_type, members, ';',
+ declaration_name)
+ self.check_sections(ln, declaration_name, decl_type)
+ self.output_declaration(decl_type, declaration_name,
+ definition=self.format_struct_decl(declaration),
+ purpose=self.entry.declaration_purpose)
+
+ def dump_enum(self, ln, proto):
+ """
+ Stores an enum inside self.entries array.
+ """
+ #
+ # Strip preprocessor directives. Note that this depends on the
+ # trailing semicolon we added in process_proto_type().
+ #
+ proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
+ #
+ # Parse out the name and members of the enum. Typedef form first.
+ #
+ r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
+ if r.search(proto):
+ declaration_name = r.group(2)
+ members = trim_private_members(r.group(1))
+ #
+ # Failing that, look for a straight enum
+ #
+ else:
+ r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
+ if r.match(proto):
+ declaration_name = r.group(1)
+ members = trim_private_members(r.group(2))
+ #
+ # OK, this isn't going to work.
+ #
+ else:
+ self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
+ return
+ #
+ # Make sure we found what we were expecting.
+ #
+ if self.entry.identifier != declaration_name:
+ if self.entry.identifier == "":
+ self.emit_msg(ln,
+ f"{proto}: wrong kernel-doc identifier on prototype")
+ else:
+ self.emit_msg(ln,
+ f"expecting prototype for enum {self.entry.identifier}. "
+ f"Prototype was for enum {declaration_name} instead")
+ return
+
+ if not declaration_name:
+ declaration_name = "(anonymous)"
+ #
+ # Parse out the name of each enum member, and verify that we
+ # have a description for it.
+ #
+ member_set = set()
+ members = KernRe(r'\([^;)]*\)').sub('', members)
+ for arg in members.split(','):
+ if not arg:
+ continue
+ arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ self.entry.parameterlist.append(arg)
+ if arg not in self.entry.parameterdescs:
+ self.entry.parameterdescs[arg] = self.undescribed
+ self.emit_msg(ln,
+ f"Enum value '{arg}' not described in enum '{declaration_name}'")
+ member_set.add(arg)
+ #
+ # Ensure that every described member actually exists in the enum.
+ #
+ for k in self.entry.parameterdescs:
+ if k not in member_set:
+ self.emit_msg(ln,
+ f"Excess enum value '@{k}' description in '{declaration_name}'")
+
+ self.output_declaration('enum', declaration_name,
+ purpose=self.entry.declaration_purpose)
+
+ def dump_declaration(self, ln, prototype):
+ """
+ Stores a data declaration inside self.entries array.
+ """
+
+ if self.entry.decl_type == "enum":
+ self.dump_enum(ln, prototype)
+ elif self.entry.decl_type == "typedef":
+ self.dump_typedef(ln, prototype)
+ elif self.entry.decl_type in ["union", "struct"]:
+ self.dump_struct(ln, prototype)
+ else:
+ # This would be a bug
+ self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
+
+ def dump_function(self, ln, prototype):
+ """
+ Stores a function or function macro inside self.entries array.
+ """
+
+ found = func_macro = False
+ return_type = ''
+ decl_type = 'function'
+ #
+ # Apply the initial transformations.
+ #
+ prototype = apply_transforms(function_xforms, prototype)
+ #
+ # If we have a macro, remove the "#define" at the front.
+ #
+ new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
+ if new_proto != prototype:
+ prototype = new_proto
+ #
+ # Dispense with the simple "#define A B" case here; the key
+ # is the space after the name of the symbol being defined.
+ # NOTE that the seemingly misnamed "func_macro" indicates a
+ # macro *without* arguments.
+ #
+ r = KernRe(r'^(\w+)\s+')
+ if r.search(prototype):
+ return_type = ''
+ declaration_name = r.group(1)
+ func_macro = True
+ found = True
+
+ # Yes, this truly is vile. We are looking for:
+ # 1. Return type (may be nothing if we're looking at a macro)
+ # 2. Function name
+ # 3. Function parameters.
+ #
+ # All the while we have to watch out for function pointer parameters
+ # (which IIRC is what the two sections are for), C types (these
+ # regexps don't even start to express all the possibilities), and
+ # so on.
+ #
+ # If you mess with these regexps, it's a good idea to check that
+ # the following functions' documentation still comes out right:
+ # - parport_register_device (function pointer parameters)
+ # - atomic_set (macro)
+ # - pci_match_device, __copy_to_user (long return type)
+
+ name = r'\w+'
+ type1 = r'(?:[\w\s]+)?'
+ type2 = r'(?:[\w\s]+\*+)+'
+ #
+ # Attempt to match first on (args) with no internal parentheses; this
+ # lets us easily filter out __acquires() and other post-args stuff. If
+ # that fails, just grab the rest of the line to the last closing
+ # parenthesis.
+ #
+ proto_args = r'\(([^\(]*|.*)\)'
+ #
+ # (Except for the simple macro case) attempt to split up the prototype
+ # in the various ways we understand.
+ #
+ if not found:
+ patterns = [
+ rf'^()({name})\s*{proto_args}',
+ rf'^({type1})\s+({name})\s*{proto_args}',
+ rf'^({type2})\s*({name})\s*{proto_args}',
+ ]
+
+ for p in patterns:
+ r = KernRe(p)
+ if r.match(prototype):
+ return_type = r.group(1)
+ declaration_name = r.group(2)
+ args = r.group(3)
+ self.create_parameter_list(ln, decl_type, args, ',',
+ declaration_name)
+ found = True
+ break
+ #
+ # Parsing done; make sure that things are as we expect.
+ #
+ if not found:
+ self.emit_msg(ln,
+ f"cannot understand function prototype: '{prototype}'")
+ return
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
+ f"Prototype was for {declaration_name}() instead")
+ return
+ self.check_sections(ln, declaration_name, "function")
+ self.check_return_section(ln, declaration_name, return_type)
+ #
+ # Store the result.
+ #
+ self.output_declaration(decl_type, declaration_name,
+ typedef=('typedef' in return_type),
+ functiontype=return_type,
+ purpose=self.entry.declaration_purpose,
+ func_macro=func_macro)
+
+
+ def dump_typedef(self, ln, proto):
+ """
+ Stores a typedef inside self.entries array.
+ """
+ #
+ # We start by looking for function typedefs.
+ #
+ typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
+ typedef_ident = r'\*?\s*(\w\S+)\s*'
+ typedef_args = r'\s*\((.*)\);'
+
+ typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
+ typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
+
+ # Parse function typedef prototypes
+ for r in [typedef1, typedef2]:
+ if not r.match(proto):
+ continue
+
+ return_type = r.group(1).strip()
+ declaration_name = r.group(2)
+ args = r.group(3)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ self.create_parameter_list(ln, 'function', args, ',', declaration_name)
+
+ self.output_declaration('function', declaration_name,
+ typedef=True,
+ functiontype=return_type,
+ purpose=self.entry.declaration_purpose)
+ return
+ #
+ # Not a function, try to parse a simple typedef.
+ #
+ r = KernRe(r'typedef.*\s+(\w+)\s*;')
+ if r.match(proto):
+ declaration_name = r.group(1)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ self.output_declaration('typedef', declaration_name,
+ purpose=self.entry.declaration_purpose)
+ return
+
+ self.emit_msg(ln, "error: Cannot parse typedef!")
+
+ @staticmethod
+ def process_export(function_set, line):
+ """
+ process EXPORT_SYMBOL* tags
+
+ This method doesn't use any variable from the class, so declare it
+ with a staticmethod decorator.
+ """
+
+ # We support documenting some exported symbols with different
+ # names. A horrible hack.
+ suffixes = [ '_noprof' ]
+
+ # Note: it accepts only one EXPORT_SYMBOL* per line, as having
+ # multiple export lines would violate Kernel coding style.
+
+ if export_symbol.search(line):
+ symbol = export_symbol.group(2)
+ elif export_symbol_ns.search(line):
+ symbol = export_symbol_ns.group(2)
+ else:
+ return False
+ #
+ # Found an export, trim out any special suffixes
+ #
+ for suffix in suffixes:
+ # Be backward compatible with Python < 3.9
+ if symbol.endswith(suffix):
+ symbol = symbol[:-len(suffix)]
+ function_set.add(symbol)
+ return True
+
+ def process_normal(self, ln, line):
+ """
+ STATE_NORMAL: looking for the /** to begin everything.
+ """
+
+ if not doc_start.match(line):
+ return
+
+ # start a new entry
+ self.reset_state(ln)
+
+ # next line is always the function name
+ self.state = state.NAME
+
+ def process_name(self, ln, line):
+ """
+ STATE_NAME: Looking for the "name - description" line
+ """
+ #
+ # Check for a DOC: block and handle them specially.
+ #
+ if doc_block.search(line):
+
+ if not doc_block.group(1):
+ self.entry.begin_section(ln, "Introduction")
+ else:
+ self.entry.begin_section(ln, doc_block.group(1))
+
+ self.entry.identifier = self.entry.section
+ self.state = state.DOCBLOCK
+ #
+ # Otherwise we're looking for a normal kerneldoc declaration line.
+ #
+ elif doc_decl.search(line):
+ self.entry.identifier = doc_decl.group(1)
+
+ # Test for data declaration
+ if doc_begin_data.search(line):
+ self.entry.decl_type = doc_begin_data.group(1)
+ self.entry.identifier = doc_begin_data.group(2)
+ #
+ # Look for a function description
+ #
+ elif doc_begin_func.search(line):
+ self.entry.identifier = doc_begin_func.group(1)
+ self.entry.decl_type = "function"
+ #
+ # We struck out.
+ #
+ else:
+ self.emit_msg(ln,
+ f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
+ self.state = state.NORMAL
+ return
+ #
+ # OK, set up for a new kerneldoc entry.
+ #
+ self.state = state.BODY
+ self.entry.identifier = self.entry.identifier.strip(" ")
+ # if there's no @param blocks need to set up default section here
+ self.entry.begin_section(ln + 1)
+ #
+ # Find the description portion, which *should* be there but
+ # isn't always.
+ # (We should be able to capture this from the previous parsing - someday)
+ #
+ r = KernRe("[-:](.*)")
+ if r.search(line):
+ self.entry.declaration_purpose = trim_whitespace(r.group(1))
+ self.state = state.DECLARATION
+ else:
+ self.entry.declaration_purpose = ""
+
+ if not self.entry.declaration_purpose and self.config.wshort_desc:
+ self.emit_msg(ln,
+ f"missing initial short description on line:\n{line}")
+
+ if not self.entry.identifier and self.entry.decl_type != "enum":
+ self.emit_msg(ln,
+ f"wrong kernel-doc identifier on line:\n{line}")
+ self.state = state.NORMAL
+
+ if self.config.verbose:
+ self.emit_msg(ln,
+ f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
+ warning=False)
+ #
+ # Failed to find an identifier. Emit a warning
+ #
+ else:
+ self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
+
+ #
+ # Helper function to determine if a new section is being started.
+ #
+ def is_new_section(self, ln, line):
+ if doc_sect.search(line):
+ self.state = state.BODY
+ #
+ # Pick out the name of our new section, tweaking it if need be.
+ #
+ newsection = doc_sect.group(1)
+ if newsection.lower() == 'description':
+ newsection = 'Description'
+ elif newsection.lower() == 'context':
+ newsection = 'Context'
+ self.state = state.SPECIAL_SECTION
+ elif newsection.lower() in ["@return", "@returns",
+ "return", "returns"]:
+ newsection = "Return"
+ self.state = state.SPECIAL_SECTION
+ elif newsection[0] == '@':
+ self.state = state.SPECIAL_SECTION
+ #
+ # Initialize the contents, and get the new section going.
+ #
+ newcontents = doc_sect.group(2)
+ if not newcontents:
+ newcontents = ""
+ self.dump_section()
+ self.entry.begin_section(ln, newsection)
+ self.entry.leading_space = None
+
+ self.entry.add_text(newcontents.lstrip())
+ return True
+ return False
+
+ #
+ # Helper function to detect (and effect) the end of a kerneldoc comment.
+ #
+ def is_comment_end(self, ln, line):
+ if doc_end.search(line):
+ self.dump_section()
+
+ # Look for doc_com + <text> + doc_end:
+ r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
+ if r.match(line):
+ self.emit_msg(ln, f"suspicious ending line: {line}")
+
+ self.entry.prototype = ""
+ self.entry.new_start_line = ln + 1
+
+ self.state = state.PROTO
+ return True
+ return False
+
+
+ def process_decl(self, ln, line):
+ """
+ STATE_DECLARATION: We've seen the beginning of a declaration
+ """
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+ #
+ # Look for anything with the " * " line beginning.
+ #
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ #
+ # A blank line means that we have moved out of the declaration
+ # part of the comment (without any "special section" parameter
+ # descriptions).
+ #
+ if cont == "":
+ self.state = state.BODY
+ #
+ # Otherwise we have more of the declaration section to soak up.
+ #
+ else:
+ self.entry.declaration_purpose = \
+ trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+
+ def process_special(self, ln, line):
+ """
+ STATE_SPECIAL_SECTION: a section ending with a blank line
+ """
+ #
+ # If we have hit a blank line (only the " * " marker), then this
+ # section is done.
+ #
+ if KernRe(r"\s*\*\s*$").match(line):
+ self.entry.begin_section(ln, dump = True)
+ self.state = state.BODY
+ return
+ #
+ # Not a blank line, look for the other ways to end the section.
+ #
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+ #
+ # OK, we should have a continuation of the text for this section.
+ #
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ #
+ # If the lines of text after the first in a special section have
+ # leading white space, we need to trim it out or Sphinx will get
+ # confused. For the second line (the None case), see what we
+ # find there and remember it.
+ #
+ if self.entry.leading_space is None:
+ r = KernRe(r'^(\s+)')
+ if r.match(cont):
+ self.entry.leading_space = len(r.group(1))
+ else:
+ self.entry.leading_space = 0
+ #
+ # Otherwise, before trimming any leading chars, be *sure*
+ # that they are white space. We should maybe warn if this
+ # isn't the case.
+ #
+ for i in range(0, self.entry.leading_space):
+ if cont[i] != " ":
+ self.entry.leading_space = i
+ break
+ #
+ # Add the trimmed result to the section and we're done.
+ #
+ self.entry.add_text(cont[self.entry.leading_space:])
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+ def process_body(self, ln, line):
+ """
+ STATE_BODY: the bulk of a kerneldoc comment.
+ """
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ self.entry.add_text(cont)
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+ def process_inline_name(self, ln, line):
+ """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
+
+ if doc_inline_sect.search(line):
+ self.entry.begin_section(ln, doc_inline_sect.group(1))
+ self.entry.add_text(doc_inline_sect.group(2).lstrip())
+ self.state = state.INLINE_TEXT
+ elif doc_inline_end.search(line):
+ self.dump_section()
+ self.state = state.PROTO
+ elif doc_content.search(line):
+ self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
+ self.state = state.PROTO
+ # else ... ??
+
+ def process_inline_text(self, ln, line):
+ """STATE_INLINE_TEXT: docbook comments within a prototype."""
+
+ if doc_inline_end.search(line):
+ self.dump_section()
+ self.state = state.PROTO
+ elif doc_content.search(line):
+ self.entry.add_text(doc_content.group(1))
+ # else ... ??
+
+ def syscall_munge(self, ln, proto): # pylint: disable=W0613
+ """
+ Handle syscall definitions
+ """
+
+ is_void = False
+
+ # Strip newlines/CR's
+ proto = re.sub(r'[\r\n]+', ' ', proto)
+
+ # Check if it's a SYSCALL_DEFINE0
+ if 'SYSCALL_DEFINE0' in proto:
+ is_void = True
+
+ # Replace SYSCALL_DEFINE with correct return type & function name
+ proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
+
+ r = KernRe(r'long\s+(sys_.*?),')
+ if r.search(proto):
+ proto = KernRe(',').sub('(', proto, count=1)
+ elif is_void:
+ proto = KernRe(r'\)').sub('(void)', proto, count=1)
+
+ # Now delete all of the odd-numbered commas in the proto
+ # so that argument types & names don't have a comma between them
+ count = 0
+ length = len(proto)
+
+ if is_void:
+ length = 0 # skip the loop if is_void
+
+ for ix in range(length):
+ if proto[ix] == ',':
+ count += 1
+ if count % 2 == 1:
+ proto = proto[:ix] + ' ' + proto[ix + 1:]
+
+ return proto
+
+ def tracepoint_munge(self, ln, proto):
+ """
+ Handle tracepoint definitions
+ """
+
+ tracepointname = None
+ tracepointargs = None
+
+ # Match tracepoint name based on different patterns
+ r = KernRe(r'TRACE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
+ if r.search(proto):
+ tracepointname = r.group(2)
+
+ if tracepointname:
+ tracepointname = tracepointname.lstrip()
+
+ r = KernRe(r'TP_PROTO\((.*?)\)')
+ if r.search(proto):
+ tracepointargs = r.group(1)
+
+ if not tracepointname or not tracepointargs:
+ self.emit_msg(ln,
+ f"Unrecognized tracepoint format:\n{proto}\n")
+ else:
+ proto = f"static inline void trace_{tracepointname}({tracepointargs})"
+ self.entry.identifier = f"trace_{self.entry.identifier}"
+
+ return proto
+
+ def process_proto_function(self, ln, line):
+ """Ancillary routine to process a function prototype"""
+
+ # strip C99-style comments to end of line
+ line = KernRe(r"//.*$", re.S).sub('', line)
+ #
+ # Soak up the line's worth of prototype text, stopping at { or ; if present.
+ #
+ if KernRe(r'\s*#\s*define').match(line):
+ self.entry.prototype = line
+ elif not line.startswith('#'): # skip other preprocessor stuff
+ r = KernRe(r'([^\{]*)')
+ if r.match(line):
+ self.entry.prototype += r.group(1) + " "
+ #
+ # If we now have the whole prototype, clean it up and declare victory.
+ #
+ if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
+ # strip comments and surrounding spaces
+ self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
+ #
+ # Handle self.entry.prototypes for function pointers like:
+ # int (*pcs_config)(struct foo)
+ # by turning it into
+ # int pcs_config(struct foo)
+ #
+ r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
+ self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
+ #
+ # Handle special declaration syntaxes
+ #
+ if 'SYSCALL_DEFINE' in self.entry.prototype:
+ self.entry.prototype = self.syscall_munge(ln,
+ self.entry.prototype)
+ else:
+ r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
+ if r.search(self.entry.prototype):
+ self.entry.prototype = self.tracepoint_munge(ln,
+ self.entry.prototype)
+ #
+ # ... and we're done
+ #
+ self.dump_function(ln, self.entry.prototype)
+ self.reset_state(ln)
+
+ def process_proto_type(self, ln, line):
+ """Ancillary routine to process a type"""
+
+ # Strip C99-style comments and surrounding whitespace
+ line = KernRe(r"//.*$", re.S).sub('', line).strip()
+ if not line:
+ return # nothing to see here
+
+ # To distinguish preprocessor directive from regular declaration later.
+ if line.startswith('#'):
+ line += ";"
+ #
+ # Split the declaration on any of { } or ;, and accumulate pieces
+ # until we hit a semicolon while not inside {brackets}
+ #
+ r = KernRe(r'(.*?)([{};])')
+ for chunk in r.split(line):
+ if chunk: # Ignore empty matches
+ self.entry.prototype += chunk
+ #
+ # This cries out for a match statement ... someday after we can
+ # drop Python 3.9 ...
+ #
+ if chunk == '{':
+ self.entry.brcount += 1
+ elif chunk == '}':
+ self.entry.brcount -= 1
+ elif chunk == ';' and self.entry.brcount <= 0:
+ self.dump_declaration(ln, self.entry.prototype)
+ self.reset_state(ln)
+ return
+ #
+ # We hit the end of the line while still in the declaration; put
+ # in a space to represent the newline.
+ #
+ self.entry.prototype += ' '
+
+ def process_proto(self, ln, line):
+ """STATE_PROTO: reading a function/whatever prototype."""
+
+ if doc_inline_oneline.search(line):
+ self.entry.begin_section(ln, doc_inline_oneline.group(1))
+ self.entry.add_text(doc_inline_oneline.group(2))
+ self.dump_section()
+
+ elif doc_inline_start.search(line):
+ self.state = state.INLINE_NAME
+
+ elif self.entry.decl_type == 'function':
+ self.process_proto_function(ln, line)
+
+ else:
+ self.process_proto_type(ln, line)
+
+ def process_docblock(self, ln, line):
+ """STATE_DOCBLOCK: within a DOC: block."""
+
+ if doc_end.search(line):
+ self.dump_section()
+ self.output_declaration("doc", self.entry.identifier)
+ self.reset_state(ln)
+
+ elif doc_content.search(line):
+ self.entry.add_text(doc_content.group(1))
+
+ def parse_export(self):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+
+ for line in fp:
+ self.process_export(export_table, line)
+
+ except IOError:
+ return None
+
+ return export_table
+
+ #
+ # The state/action table telling us which function to invoke in
+ # each state.
+ #
+ state_actions = {
+ state.NORMAL: process_normal,
+ state.NAME: process_name,
+ state.BODY: process_body,
+ state.DECLARATION: process_decl,
+ state.SPECIAL_SECTION: process_special,
+ state.INLINE_NAME: process_inline_name,
+ state.INLINE_TEXT: process_inline_text,
+ state.PROTO: process_proto,
+ state.DOCBLOCK: process_docblock,
+ }
+
+ def parse_kdoc(self):
+ """
+ Open and process each line of a C source file.
+ The parsing is controlled via a state machine, and the line is passed
+ to a different process function depending on the state. The process
+ function may update the state as needed.
+
+ Besides parsing kernel-doc tags, it also parses export symbols.
+ """
+
+ prev = ""
+ prev_ln = None
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+ for ln, line in enumerate(fp):
+
+ line = line.expandtabs().strip("\n")
+
+ # Group continuation lines on prototypes
+ if self.state == state.PROTO:
+ if line.endswith("\\"):
+ prev += line.rstrip("\\")
+ if not prev_ln:
+ prev_ln = ln
+ continue
+
+ if prev:
+ ln = prev_ln
+ line = prev + line
+ prev = ""
+ prev_ln = None
+
+ self.config.log.debug("%d %s: %s",
+ ln, state.name[self.state],
+ line)
+
+ # This is an optimization over the original script.
+ # There, when export_file was used for the same file,
+ # it was read twice. Here, we use the already-existing
+ # loop to parse exported symbols as well.
+ #
+ if (self.state != state.NORMAL) or \
+ not self.process_export(export_table, line):
+ # Hand this line to the appropriate state handler
+ self.state_actions[self.state](self, ln, line)
+
+ except OSError:
+ self.config.log.error(f"Error: Cannot open file {self.fname}")
+
+ return export_table, self.entries
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
new file mode 100644
index 000000000000..2dfa1bf83d64
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+# Local cache for regular expressions
+re_cache = {}
+
+
+class KernRe:
+ """
+ Helper class to simplify regex declaration and usage.
+
+ It calls re.compile for a given pattern. It also allows adding
+ regular expressions and define sub at class init time.
+
+ Regular expressions can be cached via an argument, helping to speedup
+ searches.
+ """
+
+ def _add_regex(self, string, flags):
+ """
+ Adds a new regex or reuses it from the cache.
+ """
+ self.regex = re_cache.get(string, None)
+ if not self.regex:
+ self.regex = re.compile(string, flags=flags)
+ if self.cache:
+ re_cache[string] = self.regex
+
+ def __init__(self, string, cache=True, flags=0):
+ """
+ Compile a regular expression and initialize internal vars.
+ """
+
+ self.cache = cache
+ self.last_match = None
+
+ self._add_regex(string, flags)
+
+ def __str__(self):
+ """
+ Return the regular expression pattern.
+ """
+ return self.regex.pattern
+
+ def __add__(self, other):
+ """
+ Allows adding two regular expressions into one.
+ """
+
+ return KernRe(str(self) + str(other), cache=self.cache or other.cache,
+ flags=self.regex.flags | other.regex.flags)
+
+ def match(self, string):
+ """
+ Handles a re.match storing its results
+ """
+
+ self.last_match = self.regex.match(string)
+ return self.last_match
+
+ def search(self, string):
+ """
+ Handles a re.search storing its results
+ """
+
+ self.last_match = self.regex.search(string)
+ return self.last_match
+
+ def findall(self, string):
+ """
+ Alias to re.findall
+ """
+
+ return self.regex.findall(string)
+
+ def split(self, string):
+ """
+ Alias to re.split
+ """
+
+ return self.regex.split(string)
+
+ def sub(self, sub, string, count=0):
+ """
+ Alias to re.sub
+ """
+
+ return self.regex.sub(sub, string, count=count)
+
+ def group(self, num):
+ """
+ Returns the group results of the last match
+ """
+
+ return self.last_match.group(num)
+
+
+class NestedMatch:
+ """
+ Finding nested delimiters is hard with regular expressions. It is
+ even harder on Python with its normal re module, as there are several
+ advanced regular expressions that are missing.
+
+ This is the case of this pattern:
+
+ '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+ which is used to properly match open/close parentheses of the
+ string search STRUCT_GROUP(),
+
+ Add a class that counts pairs of delimiters, using it to match and
+ replace nested expressions.
+
+ The original approach was suggested by:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+ Although I re-implemented it to make it more generic and match 3 types
+ of delimiters. The logic checks if delimiters are paired. If not, it
+ will ignore the search string.
+ """
+
+ # TODO: make NestedMatch handle multiple match groups
+ #
+ # Right now, regular expressions to match it are defined only up to
+ # the start delimiter, e.g.:
+ #
+ # \bSTRUCT_GROUP\(
+ #
+ # is similar to: STRUCT_GROUP\((.*)\)
+ # except that the content inside the match group is delimiter-aligned.
+ #
+ # The content inside parentheses is converted into a single replace
+ # group (e.g. r`\1').
+ #
+ # It would be nice to change such definition to support multiple
+ # match groups, allowing a regex equivalent to:
+ #
+ # FOO\((.*), (.*), (.*)\)
+ #
+ # it is probably easier to define it not as a regular expression, but
+ # with some lexical definition like:
+ #
+ # FOO(arg1, arg2, arg3)
+
+ DELIMITER_PAIRS = {
+ '{': '}',
+ '(': ')',
+ '[': ']',
+ }
+
+ RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
+
+ def _search(self, regex, line):
+ """
+ Finds paired blocks for a regex that ends with a delimiter.
+
+ The suggestion of using finditer to match pairs came from:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+ but I ended using a different implementation to align all three types
+ of delimiters and seek for an initial regular expression.
+
+ The algorithm seeks for open/close paired delimiters and places them
+ into a stack, yielding a start/stop position of each match when the
+ stack is zeroed.
+
+ The algorithm should work fine for properly paired lines, but will
+ silently ignore end delimiters that precede a start delimiter.
+ This should be OK for kernel-doc parser, as unaligned delimiters
+ would cause compilation errors. So, we don't need to raise exceptions
+ to cover such issues.
+ """
+
+ stack = []
+
+ for match_re in regex.finditer(line):
+ start = match_re.start()
+ offset = match_re.end()
+
+ d = line[offset - 1]
+ if d not in self.DELIMITER_PAIRS:
+ continue
+
+ end = self.DELIMITER_PAIRS[d]
+ stack.append(end)
+
+ for match in self.RE_DELIM.finditer(line[offset:]):
+ pos = match.start() + offset
+
+ d = line[pos]
+
+ if d in self.DELIMITER_PAIRS:
+ end = self.DELIMITER_PAIRS[d]
+
+ stack.append(end)
+ continue
+
+ # Does the end delimiter match what is expected?
+ if stack and d == stack[-1]:
+ stack.pop()
+
+ if not stack:
+ yield start, offset, pos + 1
+ break
+
+ def search(self, regex, line):
+ """
+ This is similar to re.search:
+
+ It matches a regex that it is followed by a delimiter,
+ returning occurrences only if all delimiters are paired.
+ """
+
+ for t in self._search(regex, line):
+
+ yield line[t[0]:t[2]]
+
+ def sub(self, regex, sub, line, count=0):
+ """
+ This is similar to re.sub:
+
+ It matches a regex that it is followed by a delimiter,
+ replacing occurrences only if all delimiters are paired.
+
+ if r'\1' is used, it works just like re: it places there the
+ matched paired data with the delimiter stripped.
+
+ If count is different than zero, it will replace at most count
+ items.
+ """
+ out = ""
+
+ cur_pos = 0
+ n = 0
+
+ for start, end, pos in self._search(regex, line):
+ out += line[cur_pos:start]
+
+ # Value, ignoring start/end delimiters
+ value = line[end:pos - 1]
+
+ # replaces \1 at the sub string, if \1 is used there
+ new_sub = sub
+ new_sub = new_sub.replace(r'\1', value)
+
+ out += new_sub
+
+ # Drop end ';' if any
+ if line[pos] == ';':
+ pos += 1
+
+ cur_pos = pos
+ n += 1
+
+ if count and count >= n:
+ break
+
+ # Append the remaining string
+ l = len(line)
+ out += line[cur_pos:l]
+
+ return out
diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py
new file mode 100755
index 000000000000..29317f8006ea
--- /dev/null
+++ b/tools/lib/python/kdoc/latex_fonts.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+For "make pdfdocs", reports of build errors of translations.pdf started
+arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE
+tumbleweed have started deploying variable-font [3] format of "Noto CJK"
+fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK
+(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which
+does not (and likely never will) understand variable fonts for historical
+reasons.
+
+The build error happens even when both of variable- and non-variable-format
+fonts are found on the build system. To make matters worse, Fedora enlists
+variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN,
+-zh_TW, etc. Hence developers who have interest in CJK pages are more
+likely to encounter the build errors.
+
+This script is invoked from the error path of "make pdfdocs" and emits
+suggestions if variable-font files of "Noto CJK" fonts are in the list of
+fonts accessible from XeTeX.
+
+References:
+[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/
+[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/
+[3]: https://en.wikipedia.org/wiki/Variable_font
+[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts
+[5]: https://build.opensuse.org/request/show/1157217
+
+#===========================================================================
+Workarounds for building translations.pdf
+#===========================================================================
+
+* Denylist "variable font" Noto CJK fonts.
+ - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with
+ tweaks if necessary. Remove leading "".
+ - Path of fontconfig/fonts.conf can be overridden by setting an env
+ variable FONTS_CONF_DENY_VF.
+
+ * Template:
+-----------------------------------------------------------------
+<?xml version="1.0"?>
+<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
+<fontconfig>
+<!--
+ Ignore variable-font glob (not to break xetex)
+-->
+ <selectfont>
+ <rejectfont>
+ <!--
+ for Fedora
+ -->
+ <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob>
+ <!--
+ for openSUSE tumbleweed
+ -->
+ <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob>
+ </rejectfont>
+ </selectfont>
+</fontconfig>
+-----------------------------------------------------------------
+
+ The denylisting is activated for "make pdfdocs".
+
+* For skipping CJK pages in PDF
+ - Uninstall texlive-xecjk.
+ Denylisting is not needed in this case.
+
+* For printing CJK pages in PDF
+ - Need non-variable "Noto CJK" fonts.
+ * Fedora
+ - google-noto-sans-cjk-fonts
+ - google-noto-serif-cjk-fonts
+ * openSUSE tumbleweed
+ - Non-variable "Noto CJK" fonts are not available as distro packages
+ as of April, 2024. Fetch a set of font files from upstream Noto
+ CJK Font released at:
+ https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc
+ and at:
+ https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc
+ , then uncompress and deploy them.
+ - Remember to update fontconfig cache by running fc-cache.
+
+!!! Caution !!!
+ Uninstalling "variable font" packages can be dangerous.
+ They might be depended upon by other packages important for your work.
+ Denylisting should be less invasive, as it is effective only while
+ XeLaTeX runs in "make pdfdocs".
+"""
+
+import os
+import re
+import subprocess
+import textwrap
+import sys
+
+class LatexFontChecker:
+ """
+ Detect problems with CJK variable fonts that affect PDF builds for
+ translations.
+ """
+
+ def __init__(self, deny_vf=None):
+ if not deny_vf:
+ deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf")
+
+ self.environ = os.environ.copy()
+ self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf)
+
+ self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK")
+
+ def description(self):
+ return __doc__
+
+ def get_noto_cjk_vf_fonts(self):
+ """Get Noto CJK fonts"""
+
+ cjk_fonts = set()
+ cmd = ["fc-list", ":", "file", "family", "variable"]
+ try:
+ result = subprocess.run(cmd,stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ env=self.environ,
+ check=True)
+
+ except subprocess.CalledProcessError as exc:
+ sys.exit(f"Error running fc-list: {repr(exc)}")
+
+ for line in result.stdout.splitlines():
+ if 'variable=True' not in line:
+ continue
+
+ match = self.re_cjk.search(line)
+ if match:
+ cjk_fonts.add(match.group(1))
+
+ return sorted(cjk_fonts)
+
+ def check(self):
+ """Check for problems with CJK fonts"""
+
+ fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ")
+ if not fonts:
+ return None
+
+ rel_file = os.path.relpath(__file__, os.getcwd())
+
+ msg = "=" * 77 + "\n"
+ msg += 'XeTeX is confused by "variable font" files listed below:\n'
+ msg += fonts + "\n"
+ msg += textwrap.dedent(f"""
+ For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.
+ Or, CJK pages can be skipped by uninstalling texlive-xecjk.
+
+ For more info on denylisting, other options, and variable font, run:
+
+ tools/docs/check-variable-fonts.py -h
+ """)
+ msg += "=" * 77
+
+ return msg
diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py
new file mode 100755
index 000000000000..25361996cd20
--- /dev/null
+++ b/tools/lib/python/kdoc/parse_data_structs.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0912,R0915
+
+"""
+Parse a source file or header, creating ReStructured Text cross references.
+
+It accepts an optional file to change the default symbol reference or to
+suppress symbols from the output.
+
+It is capable of identifying defines, functions, structs, typedefs,
+enums and enum symbols and create cross-references for all of them.
+It is also capable of distinguish #define used for specifying a Linux
+ioctl.
+
+The optional rules file contains a set of rules like:
+
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+"""
+
+import os
+import re
+import sys
+
+
+class ParseDataStructs:
+ """
+ Creates an enriched version of a Kernel header file with cross-links
+ to each C data structure type.
+
+ It is meant to allow having a more comprehensive documentation, where
+ uAPI headers will create cross-reference links to the code.
+
+ It is capable of identifying defines, functions, structs, typedefs,
+ enums and enum symbols and create cross-references for all of them.
+ It is also capable of distinguish #define used for specifying a Linux
+ ioctl.
+
+ By default, it create rules for all symbols and defines, but it also
+ allows parsing an exception file. Such file contains a set of rules
+ using the syntax below:
+
+ 1. Ignore rules:
+
+ ignore <type> <symbol>`
+
+ Removes the symbol from reference generation.
+
+ 2. Replace rules:
+
+ replace <type> <old_symbol> <new_reference>
+
+ Replaces how old_symbol with a new reference. The new_reference can be:
+
+ - A simple symbol name;
+ - A full Sphinx reference.
+
+ 3. Namespace rules
+
+ namespace <namespace>
+
+ Sets C namespace to be used during cross-reference generation. Can
+ be overridden by replace rules.
+
+ On ignore and replace rules, <type> can be:
+ - ioctl: for defines that end with _IO*, e.g. ioctl definitions
+ - define: for other defines
+ - symbol: for symbols defined within enums;
+ - typedef: for typedefs;
+ - enum: for the name of a non-anonymous enum;
+ - struct: for structs.
+
+ Examples:
+
+ ignore define __LINUX_MEDIA_H
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+
+ namespace MC
+ """
+
+ # Parser regexes with multiple ways to capture enums and structs
+ RE_ENUMS = [
+ re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
+ re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
+ re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
+ re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
+ ]
+ RE_STRUCTS = [
+ re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+ re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
+ re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+ re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
+ ]
+
+ # FIXME: the original code was written a long time before Sphinx C
+ # domain to have multiple namespaces. To avoid to much turn at the
+ # existing hyperlinks, the code kept using "c:type" instead of the
+ # right types. To change that, we need to change the types not only
+ # here, but also at the uAPI media documentation.
+ DEF_SYMBOL_TYPES = {
+ "ioctl": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "IOCTL Commands",
+ },
+ "define": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "Macros and Definitions",
+ },
+ # We're calling each definition inside an enum as "symbol"
+ "symbol": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "Enumeration values",
+ },
+ "typedef": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Type Definitions",
+ },
+ # This is the description of the enum itself
+ "enum": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Enumerations",
+ },
+ "struct": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Structures",
+ },
+ }
+
+ def __init__(self, debug: bool = False):
+ """Initialize internal vars"""
+ self.debug = debug
+ self.data = ""
+
+ self.symbols = {}
+
+ self.namespace = None
+ self.ignore = []
+ self.replace = []
+
+ for symbol_type in self.DEF_SYMBOL_TYPES:
+ self.symbols[symbol_type] = {}
+
+ def read_exceptions(self, fname: str):
+ if not fname:
+ return
+
+ name = os.path.basename(fname)
+
+ with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
+ for ln, line in enumerate(f):
+ ln += 1
+ line = line.strip()
+ if not line or line.startswith("#"):
+ continue
+
+ # ignore rules
+ match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
+
+ if match:
+ self.ignore.append((ln, match.group(1), match.group(2)))
+ continue
+
+ # replace rules
+ match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
+ if match:
+ self.replace.append((ln, match.group(1), match.group(2),
+ match.group(3)))
+ continue
+
+ match = re.match(r"^namespace\s+(\S+)", line)
+ if match:
+ self.namespace = match.group(1)
+ continue
+
+ sys.exit(f"{name}:{ln}: invalid line: {line}")
+
+ def apply_exceptions(self):
+ """
+ Process exceptions file with rules to ignore or replace references.
+ """
+
+ # Handle ignore rules
+ for ln, c_type, symbol in self.ignore:
+ if c_type not in self.DEF_SYMBOL_TYPES:
+ sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+ d = self.symbols[c_type]
+ if symbol in d:
+ del d[symbol]
+
+ # Handle replace rules
+ for ln, c_type, old, new in self.replace:
+ if c_type not in self.DEF_SYMBOL_TYPES:
+ sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+ reftype = None
+
+ # Parse reference type when the type is specified
+
+ match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
+ if match:
+ reftype = f":c:{match.group(1)}"
+ new = match.group(2)
+ else:
+ match = re.search(r"(\:ref)\:\`(.+)\`", new)
+ if match:
+ reftype = match.group(1)
+ new = match.group(2)
+
+ # If the replacement rule doesn't have a type, get default
+ if not reftype:
+ reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
+ if not reftype:
+ reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
+
+ new_ref = f"{reftype}:`{old} <{new}>`"
+
+ # Change self.symbols to use the replacement rule
+ if old in self.symbols[c_type]:
+ (_, ln) = self.symbols[c_type][old]
+ self.symbols[c_type][old] = (new_ref, ln)
+ else:
+ print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
+
+ def store_type(self, ln, symbol_type: str, symbol: str,
+ ref_name: str = None, replace_underscores: bool = True):
+ """
+ Stores a new symbol at self.symbols under symbol_type.
+
+ By default, underscores are replaced by "-"
+ """
+ defs = self.DEF_SYMBOL_TYPES[symbol_type]
+
+ prefix = defs.get("prefix", "")
+ suffix = defs.get("suffix", "")
+ ref_type = defs.get("ref_type")
+
+ # Determine ref_link based on symbol type
+ if ref_type or self.namespace:
+ if not ref_name:
+ ref_name = symbol.lower()
+
+ # c-type references don't support hash
+ if ref_type == ":ref" and replace_underscores:
+ ref_name = ref_name.replace("_", "-")
+
+ # C domain references may have namespaces
+ if ref_type.startswith(":c:"):
+ if self.namespace:
+ ref_name = f"{self.namespace}.{ref_name}"
+
+ if ref_type:
+ ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
+ else:
+ ref_link = f"`{symbol} <{ref_name}>`"
+ else:
+ ref_link = symbol
+
+ self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
+
+ def store_line(self, line):
+ """Stores a line at self.data, properly indented"""
+ line = " " + line.expandtabs()
+ self.data += line.rstrip(" ")
+
+ def parse_file(self, file_in: str, exceptions: str = None):
+ """Reads a C source file and get identifiers"""
+ self.data = ""
+ is_enum = False
+ is_comment = False
+ multiline = ""
+
+ self.read_exceptions(exceptions)
+
+ with open(file_in, "r",
+ encoding="utf-8", errors="backslashreplace") as f:
+ for line_no, line in enumerate(f):
+ self.store_line(line)
+ line = line.strip("\n")
+
+ # Handle continuation lines
+ if line.endswith(r"\\"):
+ multiline += line[-1]
+ continue
+
+ if multiline:
+ line = multiline + line
+ multiline = ""
+
+ # Handle comments. They can be multilined
+ if not is_comment:
+ if re.search(r"/\*.*", line):
+ is_comment = True
+ else:
+ # Strip C99-style comments
+ line = re.sub(r"(//.*)", "", line)
+
+ if is_comment:
+ if re.search(r".*\*/", line):
+ is_comment = False
+ else:
+ multiline = line
+ continue
+
+ # At this point, line variable may be a multilined statement,
+ # if lines end with \ or if they have multi-line comments
+ # With that, it can safely remove the entire comments,
+ # and there's no need to use re.DOTALL for the logic below
+
+ line = re.sub(r"(/\*.*\*/)", "", line)
+ if not line.strip():
+ continue
+
+ # It can be useful for debug purposes to print the file after
+ # having comments stripped and multi-lines grouped.
+ if self.debug > 1:
+ print(f"line {line_no + 1}: {line}")
+
+ # Now the fun begins: parse each type and store it.
+
+ # We opted for a two parsing logic here due to:
+ # 1. it makes easier to debug issues not-parsed symbols;
+ # 2. we want symbol replacement at the entire content, not
+ # just when the symbol is detected.
+
+ if is_enum:
+ match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
+ if match:
+ self.store_type(line_no, "symbol", match.group(1))
+ if "}" in line:
+ is_enum = False
+ continue
+
+ match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
+ if match:
+ self.store_type(line_no, "ioctl", match.group(1),
+ replace_underscores=False)
+ continue
+
+ match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
+ if match:
+ self.store_type(line_no, "define", match.group(1))
+ continue
+
+ match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
+ line)
+ if match:
+ name = match.group(2).strip()
+ symbol = match.group(3)
+ self.store_type(line_no, "typedef", symbol, ref_name=name)
+ continue
+
+ for re_enum in self.RE_ENUMS:
+ match = re_enum.match(line)
+ if match:
+ self.store_type(line_no, "enum", match.group(1))
+ is_enum = True
+ break
+
+ for re_struct in self.RE_STRUCTS:
+ match = re_struct.match(line)
+ if match:
+ self.store_type(line_no, "struct", match.group(1))
+ break
+
+ self.apply_exceptions()
+
+ def debug_print(self):
+ """
+ Print debug information containing the replacement rules per symbol.
+ To make easier to check, group them per type.
+ """
+ if not self.debug:
+ return
+
+ for c_type, refs in self.symbols.items():
+ if not refs: # Skip empty dictionaries
+ continue
+
+ print(f"{c_type}:")
+
+ for symbol, (ref, ln) in sorted(refs.items()):
+ print(f" #{ln:<5d} {symbol} -> {ref}")
+
+ print()
+
+ def gen_output(self):
+ """Write the formatted output to a file."""
+
+ # Avoid extra blank lines
+ text = re.sub(r"\s+$", "", self.data) + "\n"
+ text = re.sub(r"\n\s+\n", "\n\n", text)
+
+ # Escape Sphinx special characters
+ text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
+
+ # Source uAPI files may have special notes. Use bold font for them
+ text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
+
+ # Delimiters to catch the entire symbol after escaped
+ start_delim = r"([ \n\t\(=\*\@])"
+ end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
+
+ # Process all reference types
+ for ref_dict in self.symbols.values():
+ for symbol, (replacement, _) in ref_dict.items():
+ symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
+ text = re.sub(fr'{start_delim}{symbol}{end_delim}',
+ fr'\1{replacement}\2', text)
+
+ # Remove "\ " where not needed: before spaces and at the end of lines
+ text = re.sub(r"\\ ([\n ])", r"\1", text)
+ text = re.sub(r" \\ ", " ", text)
+
+ return text
+
+ def gen_toc(self):
+ """
+ Create a list of symbols to be part of a TOC contents table
+ """
+ text = []
+
+ # Sort symbol types per description
+ symbol_descriptions = []
+ for k, v in self.DEF_SYMBOL_TYPES.items():
+ symbol_descriptions.append((v['description'], k))
+
+ symbol_descriptions.sort()
+
+ # Process each category
+ for description, c_type in symbol_descriptions:
+
+ refs = self.symbols[c_type]
+ if not refs: # Skip empty categories
+ continue
+
+ text.append(f"{description}")
+ text.append("-" * len(description))
+ text.append("")
+
+ # Sort symbols alphabetically
+ for symbol, (ref, ln) in sorted(refs.items()):
+ text.append(f"- LINENO_{ln}: {ref}")
+
+ text.append("") # Add empty line between categories
+
+ return "\n".join(text)
+
+ def write_output(self, file_in: str, file_out: str, toc: bool):
+ title = os.path.basename(file_in)
+
+ if toc:
+ text = self.gen_toc()
+ else:
+ text = self.gen_output()
+
+ with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
+ f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
+ f.write(f"{title}\n")
+ f.write("=" * len(title) + "\n\n")
+
+ if not toc:
+ f.write(".. parsed-literal::\n\n")
+
+ f.write(text)
diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py
new file mode 100644
index 000000000000..e83088013db2
--- /dev/null
+++ b/tools/lib/python/kdoc/python_version.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+"""
+Handle Python version check logic.
+
+Not all Python versions are supported by scripts. Yet, on some cases,
+like during documentation build, a newer version of python could be
+available.
+
+This class allows checking if the minimal requirements are followed.
+
+Better than that, PythonVersion.check_python() not only checks the minimal
+requirements, but it automatically switches to a the newest available
+Python version if present.
+
+"""
+
+import os
+import re
+import subprocess
+import shlex
+import sys
+
+from glob import glob
+from textwrap import indent
+
+class PythonVersion:
+ """
+ Ancillary methods that checks for missing dependencies for different
+ types of types, like binaries, python modules, rpm deps, etc.
+ """
+
+ def __init__(self, version):
+ """Ïnitialize self.version tuple from a version string"""
+ self.version = self.parse_version(version)
+
+ @staticmethod
+ def parse_version(version):
+ """Convert a major.minor.patch version into a tuple"""
+ return tuple(int(x) for x in version.split("."))
+
+ @staticmethod
+ def ver_str(version):
+ """Returns a version tuple as major.minor.patch"""
+ return ".".join([str(x) for x in version])
+
+ @staticmethod
+ def cmd_print(cmd, max_len=80):
+ cmd_line = []
+
+ for w in cmd:
+ w = shlex.quote(w)
+
+ if cmd_line:
+ if not max_len or len(cmd_line[-1]) + len(w) < max_len:
+ cmd_line[-1] += " " + w
+ continue
+ else:
+ cmd_line[-1] += " \\"
+ cmd_line.append(w)
+ else:
+ cmd_line.append(w)
+
+ return "\n ".join(cmd_line)
+
+ def __str__(self):
+ """Returns a version tuple as major.minor.patch from self.version"""
+ return self.ver_str(self.version)
+
+ @staticmethod
+ def get_python_version(cmd):
+ """
+ Get python version from a Python binary. As we need to detect if
+ are out there newer python binaries, we can't rely on sys.release here.
+ """
+
+ kwargs = {}
+ if sys.version_info < (3, 7):
+ kwargs['universal_newlines'] = True
+ else:
+ kwargs['text'] = True
+
+ result = subprocess.run([cmd, "--version"],
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ **kwargs, check=False)
+
+ version = result.stdout.strip()
+
+ match = re.search(r"(\d+\.\d+\.\d+)", version)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ print(f"Can't parse version {version}")
+ return (0, 0, 0)
+
+ @staticmethod
+ def find_python(min_version):
+ """
+ Detect if are out there any python 3.xy version newer than the
+ current one.
+
+ Note: this routine is limited to up to 2 digits for python3. We
+ may need to update it one day, hopefully on a distant future.
+ """
+ patterns = [
+ "python3.[0-9][0-9]",
+ "python3.[0-9]",
+ ]
+
+ python_cmd = []
+
+ # Seek for a python binary newer than min_version
+ for path in os.getenv("PATH", "").split(":"):
+ for pattern in patterns:
+ for cmd in glob(os.path.join(path, pattern)):
+ if os.path.isfile(cmd) and os.access(cmd, os.X_OK):
+ version = PythonVersion.get_python_version(cmd)
+ if version >= min_version:
+ python_cmd.append((version, cmd))
+
+ return sorted(python_cmd, reverse=True)
+
+ @staticmethod
+ def check_python(min_version, show_alternatives=False, bail_out=False,
+ success_on_error=False):
+ """
+ Check if the current python binary satisfies our minimal requirement
+ for Sphinx build. If not, re-run with a newer version if found.
+ """
+ cur_ver = sys.version_info[:3]
+ if cur_ver >= min_version:
+ ver = PythonVersion.ver_str(cur_ver)
+ return
+
+ python_ver = PythonVersion.ver_str(cur_ver)
+
+ available_versions = PythonVersion.find_python(min_version)
+ if not available_versions:
+ print(f"ERROR: Python version {python_ver} is not supported anymore\n")
+ print(" Can't find a new version. This script may fail")
+ return
+
+ script_path = os.path.abspath(sys.argv[0])
+
+ # Check possible alternatives
+ if available_versions:
+ new_python_cmd = available_versions[0][1]
+ else:
+ new_python_cmd = None
+
+ if show_alternatives and available_versions:
+ print("You could run, instead:")
+ for _, cmd in available_versions:
+ args = [cmd, script_path] + sys.argv[1:]
+
+ cmd_str = indent(PythonVersion.cmd_print(args), " ")
+ print(f"{cmd_str}\n")
+
+ if bail_out:
+ msg = f"Python {python_ver} not supported. Bailing out"
+ if success_on_error:
+ print(msg, file=sys.stderr)
+ sys.exit(0)
+ else:
+ sys.exit(msg)
+
+ print(f"Python {python_ver} not supported. Changing to {new_python_cmd}")
+
+ # Restart script using the newer version
+ args = [new_python_cmd, script_path] + sys.argv[1:]
+
+ try:
+ os.execv(new_python_cmd, args)
+ except OSError as e:
+ sys.exit(f"Failed to restart with {new_python_cmd}: {e}")
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 9ef569492560..ddaeb4eb3e24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -75,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
size_t ci, cj, ei;
int cmp;
+ if (!excludes->cnt)
+ return;
+
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index a1f5e388644d..41aa7a324ff4 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -46,8 +46,12 @@ else
CFLAGS := -g -Wall
endif
+NL3_CFLAGS = $(shell pkg-config --cflags libnl-3.0 2>/dev/null)
+ifeq ($(NL3_CFLAGS),)
+NL3_CFLAGS = -I/usr/include/libnl3
+endif
+
INCLUDES = \
--I/usr/include/libnl3 \
-I$(srctree)/tools/lib/thermal/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
@@ -59,6 +63,7 @@ INCLUDES = \
override CFLAGS += $(EXTRA_WARNINGS)
override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
+override CFLAGS += $(NL3_CFLAGS)
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFGLAS += -Wl,-L.
@@ -134,7 +139,7 @@ endef
install_lib: libs
$(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
$(call do_install_mkdir,$(libdir_SQ)); \
- cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+ cp -fR --preserve=mode,timestamp $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
install_headers:
$(call QUIET_INSTALL, headers) \
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
index d657176aa47f..1d3d0c04e4b6 100644
--- a/tools/lib/thermal/libthermal.map
+++ b/tools/lib/thermal/libthermal.map
@@ -1,6 +1,5 @@
LIBTHERMAL_0.0.1 {
global:
- thermal_init;
for_each_thermal_zone;
for_each_thermal_trip;
for_each_thermal_cdev;
@@ -9,9 +8,12 @@ LIBTHERMAL_0.0.1 {
thermal_zone_find_by_id;
thermal_zone_discover;
thermal_init;
+ thermal_exit;
+ thermal_events_exit;
thermal_events_init;
thermal_events_handle;
thermal_events_fd;
+ thermal_cmd_exit;
thermal_cmd_init;
thermal_cmd_get_tz;
thermal_cmd_get_cdev;
@@ -22,6 +24,7 @@ LIBTHERMAL_0.0.1 {
thermal_cmd_threshold_add;
thermal_cmd_threshold_delete;
thermal_cmd_threshold_flush;
+ thermal_sampling_exit;
thermal_sampling_init;
thermal_sampling_handle;
thermal_sampling_fd;
diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c
index 880e36df0c11..14c67e9e84c4 100644
--- a/tools/mm/page_owner_sort.c
+++ b/tools/mm/page_owner_sort.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -23,9 +24,6 @@
#include <linux/types.h>
#include <getopt.h>
-#define bool int
-#define true 1
-#define false 0
#define TASK_COMM_LEN 16
struct block_list {
@@ -669,14 +667,15 @@ int main(int argc, char **argv)
{ "pid", required_argument, NULL, 1 },
{ "tgid", required_argument, NULL, 2 },
{ "name", required_argument, NULL, 3 },
- { "cull", required_argument, NULL, 4 },
- { "sort", required_argument, NULL, 5 },
+ { "cull", required_argument, NULL, 4 },
+ { "sort", required_argument, NULL, 5 },
+ { "help", no_argument, NULL, 'h' },
{ 0, 0, 0, 0},
};
compare_flag = COMP_NO_FLAG;
- while ((opt = getopt_long(argc, argv, "admnpstP", longopts, NULL)) != -1)
+ while ((opt = getopt_long(argc, argv, "admnpstPh", longopts, NULL)) != -1)
switch (opt) {
case 'a':
compare_flag |= COMP_ALLOC;
@@ -702,6 +701,9 @@ int main(int argc, char **argv)
case 'n':
compare_flag |= COMP_COMM;
break;
+ case 'h':
+ usage();
+ exit(0);
case 1:
filter = filter | FILTER_PID;
fc.pids = parse_nums_list(optarg, &fc.pids_size);
diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c
index 1433eff99feb..80cdbd3db82d 100644
--- a/tools/mm/slabinfo.c
+++ b/tools/mm/slabinfo.c
@@ -155,6 +155,7 @@ static void usage(void)
static unsigned long read_obj(const char *name)
{
+ size_t len;
FILE *f = fopen(name, "r");
if (!f) {
@@ -165,8 +166,10 @@ static unsigned long read_obj(const char *name)
if (!fgets(buffer, sizeof(buffer), f))
buffer[0] = 0;
fclose(f);
- if (buffer[strlen(buffer)] == '\n')
- buffer[strlen(buffer)] = 0;
+ len = strlen(buffer);
+
+ if (len > 0 && buffer[len - 1] == '\n')
+ buffer[len - 1] = 0;
}
return strlen(buffer);
}
diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py
index b98574a36a4a..e22632cf38fb 100644
--- a/tools/net/sunrpc/xdrgen/generators/__init__.py
+++ b/tools/net/sunrpc/xdrgen/generators/__init__.py
@@ -2,7 +2,7 @@
"""Define a base code generator class"""
-import sys
+from pathlib import Path
from jinja2 import Environment, FileSystemLoader, Template
from xdr_ast import _XdrAst, Specification, _RpcProgram, _XdrTypeSpecifier
@@ -14,8 +14,11 @@ def create_jinja2_environment(language: str, xdr_type: str) -> Environment:
"""Open a set of templates based on output language"""
match language:
case "C":
+ templates_dir = (
+ Path(__file__).parent.parent / "templates" / language / xdr_type
+ )
environment = Environment(
- loader=FileSystemLoader(sys.path[0] + "/templates/C/" + xdr_type + "/"),
+ loader=FileSystemLoader(templates_dir),
trim_blocks=True,
lstrip_blocks=True,
)
@@ -48,9 +51,7 @@ def find_xdr_program_name(root: Specification) -> str:
def header_guard_infix(filename: str) -> str:
"""Extract the header guard infix from the specification filename"""
- basename = filename.split("/")[-1]
- program = basename.replace(".x", "")
- return program.upper()
+ return Path(filename).stem.upper()
def kernel_c_type(spec: _XdrTypeSpecifier) -> str:
diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py
index 2cca00e279cd..ad1f214ef22a 100644
--- a/tools/net/sunrpc/xdrgen/generators/union.py
+++ b/tools/net/sunrpc/xdrgen/generators/union.py
@@ -8,7 +8,7 @@ from jinja2 import Environment
from generators import SourceGenerator
from generators import create_jinja2_environment, get_jinja2_template
-from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name
+from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, _XdrString, get_header_name
from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian
@@ -40,13 +40,20 @@ def emit_union_case_spec_definition(
"""Emit a definition for an XDR union's case arm"""
if isinstance(node.arm, _XdrVoid):
return
- assert isinstance(node.arm, _XdrBasic)
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ classifier = ""
+ else:
+ type_name = node.arm.spec.type_name
+ classifier = node.arm.spec.c_classifier
+
+ assert isinstance(node.arm, (_XdrBasic, _XdrString))
template = get_jinja2_template(environment, "definition", "case_spec")
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
- classifier=node.arm.spec.c_classifier,
+ type=type_name,
+ classifier=classifier,
)
)
@@ -84,6 +91,12 @@ def emit_union_case_spec_decoder(
if isinstance(node.arm, _XdrVoid):
return
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ classifier = ""
+ else:
+ type_name = node.arm.spec.type_name
+ classifier = node.arm.spec.c_classifier
if big_endian_discriminant:
template = get_jinja2_template(environment, "decoder", "case_spec_be")
@@ -92,13 +105,13 @@ def emit_union_case_spec_decoder(
for case in node.values:
print(template.render(case=case))
- assert isinstance(node.arm, _XdrBasic)
+ assert isinstance(node.arm, (_XdrBasic, _XdrString))
template = get_jinja2_template(environment, "decoder", node.arm.template)
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
- classifier=node.arm.spec.c_classifier,
+ type=type_name,
+ classifier=classifier,
)
)
@@ -169,7 +182,10 @@ def emit_union_case_spec_encoder(
if isinstance(node.arm, _XdrVoid):
return
-
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ else:
+ type_name = node.arm.spec.type_name
if big_endian_discriminant:
template = get_jinja2_template(environment, "encoder", "case_spec_be")
else:
@@ -181,7 +197,7 @@ def emit_union_case_spec_encoder(
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
+ type=type_name,
)
)
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
index 9a814de54ae8..65698e20d8cd 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
@@ -2,5 +2,5 @@
{% if annotate %}
/* member {{ name }} (variable-length opaque) */
{% endif %}
- if (!xdrgen_decode_opaque(xdr, (opaque *)ptr, {{ maxsize }}))
+ if (!xdrgen_decode_opaque(xdr, &ptr->{{ name }}, {{ maxsize }}))
return false;
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
index da4709403dc9..b215e157dfa7 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr)
/* (basic) */
{% endif %}
return xdrgen_decode_{{ type }}(xdr, ptr);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
index d7c80e472fe3..c8953719e626 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
return false;
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
index 8b4ff08c49e5..c854fc8c74e3 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
@@ -13,5 +13,5 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
{% if annotate %}
/* (fixed-length opaque) */
{% endif %}
- return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) >= 0;
-};
+ return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) == 0;
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
index 56c5a17d6a70..bcbc1758aae9 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
/* (variable-length string) */
{% endif %}
return xdrgen_decode_string(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
index e74ffdd98463..a59cc1f38eed 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
@@ -23,4 +23,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
if (!xdrgen_decode_{{ type }}(xdr, &ptr->element[i]))
return false;
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
index f28f8b228ad5..eb05f53e1041 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
/* (variable-length opaque) */
{% endif %}
return xdrgen_decode_opaque(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
index 35effe67e4ef..0d21dd0b723a 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
@@ -18,4 +18,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (basic) */
{% endif %}
return xdrgen_encode_{{ type }}(xdr, value);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
index 95202ad5ad2d..ec8cd6509514 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
return false;
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
index 9c66a11b9912..b53fa87e1858 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (fixed-length opaque) */
{% endif %}
return xdr_stream_encode_opaque_fixed(xdr, value, {{ size }}) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
index 3d490ff180d0..28b81f1d0bd6 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (variable-length string) */
{% endif %}
return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
index 2d2384f64918..ff093c281d51 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
@@ -27,4 +27,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
{% endif %}
return false;
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
index 8508f13c95b9..2e89592fa702 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (variable-length opaque) */
{% endif %}
return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
new file mode 100644
index 000000000000..816291184e8c
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
@@ -0,0 +1,4 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+
+bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, struct {{ name }} *ptr);
+bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const struct {{ name }} *value);
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
@@ -1,4 +1,4 @@
{# SPDX-License-Identifier: GPL-2.0 #}
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
@@ -1,4 +1,4 @@
{# SPDX-License-Identifier: GPL-2.0 #}
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
new file mode 100644
index 000000000000..2f035a64f1f4
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
@@ -0,0 +1,6 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+{% if annotate %}
+ /* member {{ name }} (variable-length string) */
+{% endif %}
+ if (!xdrgen_encode_string(xdr, ptr->u.{{ name }}, {{ maxsize }}))
+ return false;
diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen
index 43762be39252..3afd0547d67c 100755
--- a/tools/net/sunrpc/xdrgen/xdrgen
+++ b/tools/net/sunrpc/xdrgen/xdrgen
@@ -10,8 +10,13 @@ __license__ = "GPL-2.0 only"
__version__ = "0.2"
import sys
+from pathlib import Path
import argparse
+_XDRGEN_DIR = Path(__file__).resolve().parent
+if str(_XDRGEN_DIR) not in sys.path:
+ sys.path.insert(0, str(_XDRGEN_DIR))
+
from subcmds import definitions
from subcmds import declarations
from subcmds import lint
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index 211df5a93ad9..7736b492f559 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -12,10 +12,13 @@ endif
libdir ?= $(prefix)/$(libdir_relative)
includedir ?= $(prefix)/include
-SUBDIRS = lib generated samples
+SPECDIR=../../../Documentation/netlink/specs
+
+SUBDIRS = lib generated samples ynltool tests
all: $(SUBDIRS) libynl.a
+ynltool: | lib generated libynl.a
samples: | lib generated
libynl.a: | lib generated
@echo -e "\tAR $@"
@@ -48,5 +51,27 @@ install: libynl.a lib/*.h
@echo -e "\tINSTALL pyynl"
@pip install --prefix=$(DESTDIR)$(prefix) .
@make -C generated install
+ @make -C tests install
+
+run_tests:
+ @$(MAKE) -C tests run_tests
+
+lint:
+ yamllint $(SPECDIR)
+
+schema_check:
+ @N=1; \
+ for spec in $(SPECDIR)/*.yaml ; do \
+ NAME=$$(basename $$spec) ; \
+ OUTPUT=$$(./pyynl/cli.py --spec $$spec --validate) ; \
+ if [ $$? -eq 0 ] ; then \
+ echo "ok $$N $$NAME schema validation" ; \
+ else \
+ echo "not ok $$N $$NAME schema validation" ; \
+ echo "$$OUTPUT" ; \
+ echo ; \
+ fi ; \
+ N=$$((N+1)) ; \
+ done
-.PHONY: all clean distclean install $(SUBDIRS)
+.PHONY: all clean distclean install run_tests lint schema_check $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 90686e241157..865fd2e8519e 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -31,6 +31,7 @@ CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_psp:=$(call get_hdr_inc,_LINUX_PSP_H,psp.h)
CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
$(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
CFLAGS_rt-link:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 824777d7e05e..ced7dce44efb 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -106,7 +106,6 @@ ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
-int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
const char *sel_name);
@@ -314,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
struct nlattr *attr;
size_t len;
- len = strlen(str);
+ len = strlen(str) + 1;
if (__ynl_attr_put_overflow(nlh, len))
return;
@@ -322,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
attr->nla_type = attr_type;
strcpy((char *)ynl_attr_data(attr), str);
- attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len);
+ attr->nla_len = NLA_HDRLEN + len;
nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
}
@@ -467,4 +466,13 @@ ynl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data)
else
ynl_attr_put_s64(nlh, type, data);
}
+
+int __ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr,
+ unsigned int type);
+
+static inline int ynl_attr_validate(struct ynl_parse_arg *yarg,
+ const struct nlattr *attr)
+{
+ return __ynl_attr_validate(yarg, attr, ynl_attr_type(attr));
+}
#endif
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index 2a169c3c0797..2bcd781111d7 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -360,15 +360,15 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
/* Attribute validation */
-int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
+int __ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr,
+ unsigned int type)
{
const struct ynl_policy_attr *policy;
- unsigned int type, len;
unsigned char *data;
+ unsigned int len;
data = ynl_attr_data(attr);
len = ynl_attr_data_len(attr);
- type = ynl_attr_type(attr);
if (type > yarg->rsp_policy->max_attr) {
yerr(yarg->ys, YNL_ERROR_INTERNAL,
"Internal error, validating unknown attribute");
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 8c192e900bd3..af02a5b7e5a2 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -7,9 +7,10 @@ import os
import pathlib
import pprint
import sys
+import textwrap
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
-from lib import YnlFamily, Netlink, NlError
+from lib import YnlFamily, Netlink, NlError, SpecFamily
sys_schema_dir='/usr/share/ynl'
relative_schema_dir='../../../../Documentation/netlink'
@@ -39,6 +40,60 @@ class YnlEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, obj)
+def print_attr_list(ynl, attr_names, attr_set, indent=2):
+ """Print a list of attributes with their types and documentation."""
+ prefix = ' ' * indent
+ for attr_name in attr_names:
+ if attr_name in attr_set.attrs:
+ attr = attr_set.attrs[attr_name]
+ attr_info = f'{prefix}- {attr_name}: {attr.type}'
+ if 'enum' in attr.yaml:
+ enum_name = attr.yaml['enum']
+ attr_info += f" (enum: {enum_name})"
+ # Print enum values if available
+ if enum_name in ynl.consts:
+ const = ynl.consts[enum_name]
+ enum_values = list(const.entries.keys())
+ attr_info += f"\n{prefix} {const.type.capitalize()}: {', '.join(enum_values)}"
+
+ # Show nested attributes reference and recursively display them
+ nested_set_name = None
+ if attr.type == 'nest' and 'nested-attributes' in attr.yaml:
+ nested_set_name = attr.yaml['nested-attributes']
+ attr_info += f" -> {nested_set_name}"
+
+ if attr.yaml.get('doc'):
+ doc_text = textwrap.indent(attr.yaml['doc'], prefix + ' ')
+ attr_info += f"\n{doc_text}"
+ print(attr_info)
+
+ # Recursively show nested attributes
+ if nested_set_name in ynl.attr_sets:
+ nested_set = ynl.attr_sets[nested_set_name]
+ # Filter out 'unspec' and other unused attrs
+ nested_names = [n for n in nested_set.attrs.keys()
+ if nested_set.attrs[n].type != 'unused']
+ if nested_names:
+ print_attr_list(ynl, nested_names, nested_set, indent + 4)
+
+
+def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True):
+ """Print a given mode (do/dump/event/notify)."""
+ mode_title = mode.capitalize()
+
+ if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']:
+ print(f'\n{mode_title} request attributes:')
+ print_attr_list(ynl, mode_spec['request']['attributes'], attr_set)
+
+ if 'reply' in mode_spec and 'attributes' in mode_spec['reply']:
+ print(f'\n{mode_title} reply attributes:')
+ print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set)
+
+ if 'attributes' in mode_spec:
+ print(f'\n{mode_title} attributes:')
+ print_attr_list(ynl, mode_spec['attributes'], attr_set)
+
+
def main():
description = """
YNL CLI utility - a general purpose netlink utility that uses YAML
@@ -70,6 +125,9 @@ def main():
group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str)
group.add_argument('--list-ops', action='store_true')
group.add_argument('--list-msgs', action='store_true')
+ group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str,
+ help='List attributes for an operation')
+ group.add_argument('--validate', action='store_true')
parser.add_argument('--duration', dest='duration', type=int,
help='when subscribed, watch for DURATION seconds')
@@ -111,15 +169,25 @@ def main():
if args.family:
spec = f"{spec_dir()}/{args.family}.yaml"
- if args.schema is None and spec.startswith(sys_schema_dir):
- args.schema = '' # disable schema validation when installed
- if args.process_unknown is None:
- args.process_unknown = True
else:
spec = args.spec
if not os.path.isfile(spec):
raise Exception(f"Spec file {spec} does not exist")
+ if args.validate:
+ try:
+ SpecFamily(spec, args.schema)
+ except Exception as error:
+ print(error)
+ exit(1)
+ return
+
+ if args.family: # set behaviour when using installed specs
+ if args.schema is None and spec.startswith(sys_schema_dir):
+ args.schema = '' # disable schema validation when installed
+ if args.process_unknown is None:
+ args.process_unknown = True
+
ynl = YnlFamily(spec, args.schema, args.process_unknown,
recv_size=args.dbg_small_recv)
if args.dbg_small_recv:
@@ -135,6 +203,28 @@ def main():
for op_name, op in ynl.msgs.items():
print(op_name, " [", ", ".join(op.modes), "]")
+ if args.list_attrs:
+ op = ynl.msgs.get(args.list_attrs)
+ if not op:
+ print(f'Operation {args.list_attrs} not found')
+ exit(1)
+
+ print(f'Operation: {op.name}')
+ print(op.yaml['doc'])
+
+ for mode in ['do', 'dump', 'event']:
+ if mode in op.yaml:
+ print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True)
+
+ if 'notify' in op.yaml:
+ mode_spec = op.yaml['notify']
+ ref_spec = ynl.msgs.get(mode_spec).yaml.get('do')
+ if ref_spec:
+ print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False)
+
+ if 'mcgrp' in op.yaml:
+ print(f"\nMulticast group: {op.yaml['mcgrp']}")
+
try:
if args.do:
reply = ynl.do(args.do, attrs, args.flags)
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index cab6b576c876..fd0f6b8d54d1 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
import argparse
-import json
import pathlib
import pprint
import sys
@@ -45,13 +44,16 @@ def print_field(reply, *desc):
Pretty-print a set of fields from the reply. desc specifies the
fields and the optional type (bool/yn).
"""
+ if not reply:
+ return
+
if len(desc) == 0:
return print_field(reply, *zip(reply.keys(), reply.keys()))
for spec in desc:
try:
field, name, tp = spec
- except:
+ except ValueError:
field, name = spec
tp = 'int'
@@ -156,7 +158,6 @@ def main():
global args
args = parser.parse_args()
- script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
spec = os.path.join(spec_dir(), 'ethtool.yaml')
schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
@@ -255,14 +256,14 @@ def main():
reply = dumpit(ynl, args, 'channels-get')
print(f'Channel parameters for {args.device}:')
- print(f'Pre-set maximums:')
+ print('Pre-set maximums:')
print_field(reply,
('rx-max', 'RX'),
('tx-max', 'TX'),
('other-max', 'Other'),
('combined-max', 'Combined'))
- print(f'Current hardware settings:')
+ print('Current hardware settings:')
print_field(reply,
('rx-count', 'RX'),
('tx-count', 'TX'),
@@ -276,14 +277,14 @@ def main():
print(f'Ring parameters for {args.device}:')
- print(f'Pre-set maximums:')
+ print('Pre-set maximums:')
print_field(reply,
('rx-max', 'RX'),
('rx-mini-max', 'RX Mini'),
('rx-jumbo-max', 'RX Jumbo'),
('tx-max', 'TX'))
- print(f'Current hardware settings:')
+ print('Current hardware settings:')
print_field(reply,
('rx', 'RX'),
('rx-mini', 'RX Mini'),
@@ -298,7 +299,7 @@ def main():
return
if args.statistics:
- print(f'NIC statistics:')
+ print('NIC statistics:')
# TODO: pass id?
strset = dumpit(ynl, args, 'strset-get')
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 71518b9842ee..ec9ea00071be 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -4,6 +4,8 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
from .ynl import YnlFamily, Netlink, NlError
+from .doc_generator import YnlDocGenerator
+
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
"SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
- "YnlFamily", "Netlink", "NlError"]
+ "YnlFamily", "Netlink", "NlError", "YnlDocGenerator"]
diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py
new file mode 100644
index 000000000000..3a16b8eb01ca
--- /dev/null
+++ b/tools/net/ynl/pyynl/lib/doc_generator.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8; mode: python -*-
+
+"""
+ Class to auto generate the documentation for Netlink specifications.
+
+ :copyright: Copyright (C) 2023 Breno Leitao <leitao@debian.org>
+ :license: GPL Version 2, June 1991 see linux/COPYING for details.
+
+ This class performs extensive parsing to the Linux kernel's netlink YAML
+ spec files, in an effort to avoid needing to heavily mark up the original
+ YAML file.
+
+ This code is split in two classes:
+ 1) RST formatters: Use to convert a string to a RST output
+ 2) YAML Netlink (YNL) doc generator: Generate docs from YAML data
+"""
+
+from typing import Any, Dict, List
+import yaml
+
+LINE_STR = '__lineno__'
+
+class NumberedSafeLoader(yaml.SafeLoader): # pylint: disable=R0901
+ """Override the SafeLoader class to add line number to parsed data"""
+
+ def construct_mapping(self, node, *args, **kwargs):
+ mapping = super().construct_mapping(node, *args, **kwargs)
+ mapping[LINE_STR] = node.start_mark.line
+
+ return mapping
+
+class RstFormatters:
+ """RST Formatters"""
+
+ SPACE_PER_LEVEL = 4
+
+ @staticmethod
+ def headroom(level: int) -> str:
+ """Return space to format"""
+ return " " * (level * RstFormatters.SPACE_PER_LEVEL)
+
+ @staticmethod
+ def bold(text: str) -> str:
+ """Format bold text"""
+ return f"**{text}**"
+
+ @staticmethod
+ def inline(text: str) -> str:
+ """Format inline text"""
+ return f"``{text}``"
+
+ @staticmethod
+ def sanitize(text: str) -> str:
+ """Remove newlines and multiple spaces"""
+ # This is useful for some fields that are spread across multiple lines
+ return str(text).replace("\n", " ").strip()
+
+ def rst_fields(self, key: str, value: str, level: int = 0) -> str:
+ """Return a RST formatted field"""
+ return self.headroom(level) + f":{key}: {value}"
+
+ def rst_definition(self, key: str, value: Any, level: int = 0) -> str:
+ """Format a single rst definition"""
+ return self.headroom(level) + key + "\n" + self.headroom(level + 1) + str(value)
+
+ def rst_paragraph(self, paragraph: str, level: int = 0) -> str:
+ """Return a formatted paragraph"""
+ return self.headroom(level) + paragraph
+
+ def rst_bullet(self, item: str, level: int = 0) -> str:
+ """Return a formatted a bullet"""
+ return self.headroom(level) + f"- {item}"
+
+ @staticmethod
+ def rst_subsection(title: str) -> str:
+ """Add a sub-section to the document"""
+ return f"{title}\n" + "-" * len(title)
+
+ @staticmethod
+ def rst_subsubsection(title: str) -> str:
+ """Add a sub-sub-section to the document"""
+ return f"{title}\n" + "~" * len(title)
+
+ @staticmethod
+ def rst_section(namespace: str, prefix: str, title: str) -> str:
+ """Add a section to the document"""
+ return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
+
+ @staticmethod
+ def rst_subtitle(title: str) -> str:
+ """Add a subtitle to the document"""
+ return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
+
+ @staticmethod
+ def rst_title(title: str) -> str:
+ """Add a title to the document"""
+ return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
+
+ def rst_list_inline(self, list_: List[str], level: int = 0) -> str:
+ """Format a list using inlines"""
+ return self.headroom(level) + "[" + ", ".join(self.inline(i) for i in list_) + "]"
+
+ @staticmethod
+ def rst_ref(namespace: str, prefix: str, name: str) -> str:
+ """Add a hyperlink to the document"""
+ mappings = {'enum': 'definition',
+ 'fixed-header': 'definition',
+ 'nested-attributes': 'attribute-set',
+ 'struct': 'definition'}
+ if prefix in mappings:
+ prefix = mappings[prefix]
+ return f":ref:`{namespace}-{prefix}-{name}`"
+
+ def rst_header(self) -> str:
+ """The headers for all the auto generated RST files"""
+ lines = []
+
+ lines.append(self.rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
+ lines.append(self.rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_toctree(maxdepth: int = 2) -> str:
+ """Generate a toctree RST primitive"""
+ lines = []
+
+ lines.append(".. toctree::")
+ lines.append(f" :maxdepth: {maxdepth}\n\n")
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_label(title: str) -> str:
+ """Return a formatted label"""
+ return f".. _{title}:\n\n"
+
+ @staticmethod
+ def rst_lineno(lineno: int) -> str:
+ """Return a lineno comment"""
+ return f".. LINENO {lineno}\n"
+
+class YnlDocGenerator:
+ """YAML Netlink specs Parser"""
+
+ fmt = RstFormatters()
+
+ def parse_mcast_group(self, mcast_group: List[Dict[str, Any]]) -> str:
+ """Parse 'multicast' group list and return a formatted string"""
+ lines = []
+ for group in mcast_group:
+ lines.append(self.fmt.rst_bullet(group["name"]))
+
+ return "\n".join(lines)
+
+ def parse_do(self, do_dict: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'do' section and return a formatted string"""
+ lines = []
+ if LINE_STR in do_dict:
+ lines.append(self.fmt.rst_lineno(do_dict[LINE_STR]))
+
+ for key in do_dict.keys():
+ if key == LINE_STR:
+ continue
+ lines.append(self.fmt.rst_paragraph(self.fmt.bold(key), level + 1))
+ if key in ['request', 'reply']:
+ lines.append(self.parse_do_attributes(do_dict[key], level + 1) + "\n")
+ else:
+ lines.append(self.fmt.headroom(level + 2) + do_dict[key] + "\n")
+
+ return "\n".join(lines)
+
+ def parse_do_attributes(self, attrs: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'attributes' section"""
+ if "attributes" not in attrs:
+ return ""
+ lines = [self.fmt.rst_fields("attributes",
+ self.fmt.rst_list_inline(attrs["attributes"]),
+ level + 1)]
+
+ return "\n".join(lines)
+
+ def parse_operations(self, operations: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse operations block"""
+ preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
+ linkable = ["fixed-header", "attribute-set"]
+ lines = []
+
+ for operation in operations:
+ if LINE_STR in operation:
+ lines.append(self.fmt.rst_lineno(operation[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'operation',
+ operation["name"]))
+ lines.append(self.fmt.rst_paragraph(operation["doc"]) + "\n")
+
+ for key in operation.keys():
+ if key == LINE_STR:
+ continue
+
+ if key in preprocessed:
+ # Skip the special fields
+ continue
+ value = operation[key]
+ if key in linkable:
+ value = self.fmt.rst_ref(namespace, key, value)
+ lines.append(self.fmt.rst_fields(key, value, 0))
+ if 'flags' in operation:
+ lines.append(self.fmt.rst_fields('flags',
+ self.fmt.rst_list_inline(operation['flags'])))
+
+ if "do" in operation:
+ lines.append(self.fmt.rst_paragraph(":do:", 0))
+ lines.append(self.parse_do(operation["do"], 0))
+ if "dump" in operation:
+ lines.append(self.fmt.rst_paragraph(":dump:", 0))
+ lines.append(self.parse_do(operation["dump"], 0))
+
+ # New line after fields
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_entries(self, entries: List[Dict[str, Any]], level: int) -> str:
+ """Parse a list of entries"""
+ ignored = ["pad"]
+ lines = []
+ for entry in entries:
+ if isinstance(entry, dict):
+ # entries could be a list or a dictionary
+ field_name = entry.get("name", "")
+ if field_name in ignored:
+ continue
+ type_ = entry.get("type")
+ if type_:
+ field_name += f" ({self.fmt.inline(type_)})"
+ lines.append(
+ self.fmt.rst_fields(field_name,
+ self.fmt.sanitize(entry.get("doc", "")),
+ level)
+ )
+ elif isinstance(entry, list):
+ lines.append(self.fmt.rst_list_inline(entry, level))
+ else:
+ lines.append(self.fmt.rst_bullet(self.fmt.inline(self.fmt.sanitize(entry)),
+ level))
+
+ lines.append("\n")
+ return "\n".join(lines)
+
+ def parse_definitions(self, defs: Dict[str, Any], namespace: str) -> str:
+ """Parse definitions section"""
+ preprocessed = ["name", "entries", "members"]
+ ignored = ["render-max"] # This is not printed
+ lines = []
+
+ for definition in defs:
+ if LINE_STR in definition:
+ lines.append(self.fmt.rst_lineno(definition[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'definition', definition["name"]))
+ for k in definition.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ lines.append(self.fmt.rst_fields(k, self.fmt.sanitize(definition[k]), 0))
+
+ # Field list needs to finish with a new line
+ lines.append("\n")
+ if "entries" in definition:
+ lines.append(self.fmt.rst_paragraph(":entries:", 0))
+ lines.append(self.parse_entries(definition["entries"], 1))
+ if "members" in definition:
+ lines.append(self.fmt.rst_paragraph(":members:", 0))
+ lines.append(self.parse_entries(definition["members"], 1))
+
+ return "\n".join(lines)
+
+ def parse_attr_sets(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse attribute from attribute-set"""
+ preprocessed = ["name", "type"]
+ linkable = ["enum", "nested-attributes", "struct", "sub-message"]
+ ignored = ["checks"]
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'attribute-set',
+ entry["name"]))
+
+ if "doc" in entry:
+ lines.append(self.fmt.rst_paragraph(entry["doc"], 0) + "\n")
+
+ for attr in entry["attributes"]:
+ if LINE_STR in attr:
+ lines.append(self.fmt.rst_lineno(attr[LINE_STR]))
+
+ type_ = attr.get("type")
+ attr_line = attr["name"]
+ if type_:
+ # Add the attribute type in the same line
+ attr_line += f" ({self.fmt.inline(type_)})"
+
+ lines.append(self.fmt.rst_subsubsection(attr_line))
+
+ for k in attr.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ if k in linkable:
+ value = self.fmt.rst_ref(namespace, k, attr[k])
+ else:
+ value = self.fmt.sanitize(attr[k])
+ lines.append(self.fmt.rst_fields(k, value, 0))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_sub_messages(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse sub-message definitions"""
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'sub-message',
+ entry["name"]))
+ for fmt in entry["formats"]:
+ value = fmt["value"]
+
+ lines.append(self.fmt.rst_bullet(self.fmt.bold(value)))
+ for attr in ['fixed-header', 'attribute-set']:
+ if attr in fmt:
+ lines.append(self.fmt.rst_fields(attr,
+ self.fmt.rst_ref(namespace,
+ attr,
+ fmt[attr]),
+ 1))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_yaml(self, obj: Dict[str, Any]) -> str:
+ """Format the whole YAML into a RST string"""
+ lines = []
+
+ # Main header
+ lineno = obj.get('__lineno__', 0)
+ lines.append(self.fmt.rst_lineno(lineno))
+
+ family = obj['name']
+
+ lines.append(self.fmt.rst_header())
+ lines.append(self.fmt.rst_label("netlink-" + family))
+
+ title = f"Family ``{family}`` netlink specification"
+ lines.append(self.fmt.rst_title(title))
+ lines.append(self.fmt.rst_paragraph(".. contents:: :depth: 3\n"))
+
+ if "doc" in obj:
+ lines.append(self.fmt.rst_subtitle("Summary"))
+ lines.append(self.fmt.rst_paragraph(obj["doc"], 0))
+
+ # Operations
+ if "operations" in obj:
+ lines.append(self.fmt.rst_subtitle("Operations"))
+ lines.append(self.parse_operations(obj["operations"]["list"],
+ family))
+
+ # Multicast groups
+ if "mcast-groups" in obj:
+ lines.append(self.fmt.rst_subtitle("Multicast groups"))
+ lines.append(self.parse_mcast_group(obj["mcast-groups"]["list"]))
+
+ # Definitions
+ if "definitions" in obj:
+ lines.append(self.fmt.rst_subtitle("Definitions"))
+ lines.append(self.parse_definitions(obj["definitions"], family))
+
+ # Attributes set
+ if "attribute-sets" in obj:
+ lines.append(self.fmt.rst_subtitle("Attribute sets"))
+ lines.append(self.parse_attr_sets(obj["attribute-sets"], family))
+
+ # Sub-messages
+ if "sub-messages" in obj:
+ lines.append(self.fmt.rst_subtitle("Sub-messages"))
+ lines.append(self.parse_sub_messages(obj["sub-messages"], family))
+
+ return "\n".join(lines)
+
+ # Main functions
+ # ==============
+
+ def parse_yaml_file(self, filename: str) -> str:
+ """Transform the YAML specified by filename into an RST-formatted string"""
+ with open(filename, "r", encoding="utf-8") as spec_file:
+ numbered_yaml = yaml.load(spec_file, Loader=NumberedSafeLoader)
+ content = self.parse_yaml(numbered_yaml)
+
+ return content
diff --git a/tools/net/ynl/pyynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index 314ec8007496..85c17fe01e35 100644
--- a/tools/net/ynl/pyynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -501,7 +501,7 @@ class SpecFamily(SpecElement):
return SpecStruct(self, elem)
def new_sub_message(self, elem):
- return SpecSubMessage(self, elem);
+ return SpecSubMessage(self, elem)
def new_operation(self, elem, req_val, rsp_val):
return SpecOperation(self, elem, req_val, rsp_val)
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 8244a5f440b2..36d36eb7e3b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -9,7 +9,6 @@ import socket
import struct
from struct import Struct
import sys
-import yaml
import ipaddress
import uuid
import queue
@@ -101,12 +100,21 @@ class Netlink:
'bitfield32', 'sint', 'uint'])
class NlError(Exception):
- def __init__(self, nl_msg):
- self.nl_msg = nl_msg
- self.error = -nl_msg.error
-
- def __str__(self):
- return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}"
+ def __init__(self, nl_msg):
+ self.nl_msg = nl_msg
+ self.error = -nl_msg.error
+
+ def __str__(self):
+ msg = "Netlink error: "
+
+ extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {}
+ if 'msg' in extack:
+ msg += extack['msg'] + ': '
+ del extack['msg']
+ msg += os.strerror(self.error)
+ if extack:
+ msg += ' ' + str(extack)
+ return msg
class ConfigError(Exception):
@@ -562,11 +570,13 @@ class YnlFamily(SpecFamily):
if attr["type"] == 'nest':
nl_type |= Netlink.NLA_F_NESTED
- attr_payload = b''
sub_space = attr['nested-attributes']
- sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
- for subname, subvalue in value.items():
- attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs)
+ attr_payload = self._add_nest_attrs(value, sub_space, search_attrs)
+ elif attr['type'] == 'indexed-array' and attr['sub-type'] == 'nest':
+ nl_type |= Netlink.NLA_F_NESTED
+ sub_space = attr['nested-attributes']
+ attr_payload = self._encode_indexed_array(value, sub_space,
+ search_attrs)
elif attr["type"] == 'flag':
if not value:
# If value is absent or false then skip attribute creation.
@@ -620,9 +630,28 @@ class YnlFamily(SpecFamily):
else:
raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
+ return self._add_attr_raw(nl_type, attr_payload)
+
+ def _add_attr_raw(self, nl_type, attr_payload):
pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4)
return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad
+ def _add_nest_attrs(self, value, sub_space, search_attrs):
+ sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
+ attr_payload = b''
+ for subname, subvalue in value.items():
+ attr_payload += self._add_attr(sub_space, subname, subvalue,
+ sub_attrs)
+ return attr_payload
+
+ def _encode_indexed_array(self, vals, sub_space, search_attrs):
+ attr_payload = b''
+ for i, val in enumerate(vals):
+ idx = i | Netlink.NLA_F_NESTED
+ val_payload = self._add_nest_attrs(val, sub_space, search_attrs)
+ attr_payload += self._add_attr_raw(idx, val_payload)
+ return attr_payload
+
def _get_enum_or_unknown(self, enum, raw):
try:
name = enum.entries_by_val[raw].name
@@ -706,7 +735,7 @@ class YnlFamily(SpecFamily):
return attr.as_bin()
def _rsp_add(self, rsp, name, is_multi, decoded):
- if is_multi == None:
+ if is_multi is None:
if name in rsp and type(rsp[name]) is not list:
rsp[name] = [rsp[name]]
is_multi = True
@@ -739,14 +768,14 @@ class YnlFamily(SpecFamily):
decoded = {}
offset = 0
if msg_format.fixed_header:
- decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header));
+ decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header))
offset = self._struct_size(msg_format.fixed_header)
if msg_format.attr_set:
if msg_format.attr_set in self.attr_sets:
subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
decoded.update(subdict)
else:
- raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'")
+ raise Exception(f"Unknown attribute-set '{msg_format.attr_set}' when decoding '{attr_spec.name}'")
return decoded
def _decode(self, attrs, space, outer_attrs = None):
@@ -936,7 +965,7 @@ class YnlFamily(SpecFamily):
formatted = hex(raw)
else:
formatted = bytes.hex(raw, ' ')
- elif display_hint in [ 'ipv4', 'ipv6' ]:
+ elif display_hint in [ 'ipv4', 'ipv6', 'ipv4-or-v6' ]:
formatted = format(ipaddress.ip_address(raw))
elif display_hint == 'uuid':
formatted = str(uuid.UUID(bytes=raw))
@@ -945,12 +974,26 @@ class YnlFamily(SpecFamily):
return formatted
def _from_string(self, string, attr_spec):
- if attr_spec.display_hint in ['ipv4', 'ipv6']:
+ if attr_spec.display_hint in ['ipv4', 'ipv6', 'ipv4-or-v6']:
ip = ipaddress.ip_address(string)
if attr_spec['type'] == 'binary':
raw = ip.packed
else:
raw = int(ip)
+ elif attr_spec.display_hint == 'hex':
+ if attr_spec['type'] == 'binary':
+ raw = bytes.fromhex(string)
+ else:
+ raw = int(string, 16)
+ elif attr_spec.display_hint == 'mac':
+ # Parse MAC address in format "00:11:22:33:44:55" or "001122334455"
+ if ':' in string:
+ mac_bytes = [int(x, 16) for x in string.split(':')]
+ else:
+ if len(string) % 2 != 0:
+ raise Exception(f"Invalid MAC address format: {string}")
+ mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)]
+ raw = bytes(mac_bytes)
else:
raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
f" when parsing '{attr_spec['name']}'")
@@ -1014,15 +1057,15 @@ class YnlFamily(SpecFamily):
self.check_ntf()
def operation_do_attributes(self, name):
- """
- For a given operation name, find and return a supported
- set of attributes (as a dict).
- """
- op = self.find_operation(name)
- if not op:
- return None
-
- return op['do']['request']['attributes'].copy()
+ """
+ For a given operation name, find and return a supported
+ set of attributes (as a dict).
+ """
+ op = self.find_operation(name)
+ if not op:
+ return None
+
+ return op['do']['request']['attributes'].copy()
def _encode_message(self, op, vals, flags, req_seq):
nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index ef032e17fec4..b517d0c605ad 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
import argparse
-import collections
import filecmp
import pathlib
import os
@@ -14,7 +13,7 @@ import yaml
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
-from lib import SpecSubMessage, SpecSubMessageFormat
+from lib import SpecSubMessage
def c_upper(name):
@@ -243,7 +242,7 @@ class Type(SpecAttr):
raise Exception(f"Attr get not implemented for class type {self.type}")
def attr_get(self, ri, var, first):
- lines, init_lines, local_vars = self._attr_get(ri, var)
+ lines, init_lines, _ = self._attr_get(ri, var)
if type(lines) is str:
lines = [lines]
if type(init_lines) is str:
@@ -251,10 +250,6 @@ class Type(SpecAttr):
kw = 'if' if first else 'else if'
ri.cw.block_start(line=f"{kw} (type == {self.enum_name})")
- if local_vars:
- for local in local_vars:
- ri.cw.p(local)
- ri.cw.nl()
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
@@ -398,7 +393,7 @@ class TypeScalar(Type):
if 'enum' in self.attr:
enum = self.family.consts[self.attr['enum']]
low, high = enum.value_range()
- if low == None and high == None:
+ if low is None and high is None:
self.checks['sparse'] = True
else:
if 'min' not in self.checks:
@@ -485,7 +480,7 @@ class TypeString(Type):
ri.cw.p(f"char *{self.c_name};")
def _attr_typol(self):
- typol = f'.type = YNL_PT_NUL_STR, '
+ typol = '.type = YNL_PT_NUL_STR, '
if self.is_selector:
typol += '.is_selector = 1, '
return typol
@@ -539,7 +534,7 @@ class TypeBinary(Type):
ri.cw.p(f"void *{self.c_name};")
def _attr_typol(self):
- return f'.type = YNL_PT_BINARY,'
+ return '.type = YNL_PT_BINARY,'
def _attr_policy(self, policy):
if len(self.checks) == 0:
@@ -556,7 +551,7 @@ class TypeBinary(Type):
elif 'exact-len' in self.checks:
mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')'
elif 'min-len' in self.checks:
- mem = '{ .len = ' + self.get_limit_str('min-len') + ', }'
+ mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')'
elif 'max-len' in self.checks:
mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')'
@@ -636,10 +631,10 @@ class TypeBitfield32(Type):
return "struct nla_bitfield32"
def _attr_typol(self):
- return f'.type = YNL_PT_BITFIELD32, '
+ return '.type = YNL_PT_BITFIELD32, '
def _attr_policy(self, policy):
- if not 'enum' in self.attr:
+ if 'enum' not in self.attr:
raise Exception('Enum required for bitfield32 attr')
enum = self.family.consts[self.attr['enum']]
mask = enum.get_mask(as_flags=True)
@@ -725,7 +720,11 @@ class TypeMultiAttr(Type):
return 'struct ynl_string *'
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
- return scalar_pfx + self.attr['type']
+ if self.is_auto_scalar:
+ name = self.type[0] + '64'
+ else:
+ name = self.attr['type']
+ return scalar_pfx + name
else:
raise Exception(f"Sub-type {self.attr['type']} not supported yet")
@@ -792,7 +791,7 @@ class TypeMultiAttr(Type):
f"{presence} = n_{self.c_name};"]
-class TypeArrayNest(Type):
+class TypeIndexedArray(Type):
def is_multi_val(self):
return True
@@ -816,21 +815,28 @@ class TypeArrayNest(Type):
f'unsigned int n_{self.c_name}']
return super().arg_member(ri)
+ def _attr_policy(self, policy):
+ if self.attr['sub-type'] == 'nest':
+ return f'NLA_POLICY_NESTED_ARRAY({self.nested_render_name}_nl_policy)'
+ return super()._attr_policy(policy)
+
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
- else:
+ elif self.attr['sub-type'] == 'nest':
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ else:
+ raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
'ynl_attr_for_each_nested(attr2, attr) {',
- '\tif (ynl_attr_validate(yarg, attr2))',
+ '\tif (__ynl_attr_validate(yarg, attr2, type))',
'\t\treturn YNL_PARSE_CB_ERROR;',
- f'\t{var}->_count.{self.c_name}++;',
+ f'\tn_{self.c_name}++;',
'}']
return get_lines, None, local_vars
@@ -848,13 +854,25 @@ class TypeArrayNest(Type):
ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
ri.cw.p(f"{self.nested_render_name}_put(nlh, i, &{var}->{self.c_name}[i]);")
else:
- raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+ raise Exception(f"Put for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
ri.cw.p('ynl_attr_nest_end(nlh, array);')
def _setter_lines(self, ri, member, presence):
return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
+ def free_needs_iter(self):
+ return self.sub_type == 'nest'
+
+ def _free_lines(self, ri, var, ref):
+ lines = []
+ if self.sub_type == 'nest':
+ lines += [
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+ f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
+ ]
+ lines += f"free({var}->{ref}{self.c_name});",
+ return lines
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
@@ -909,7 +927,7 @@ class TypeSubMessage(TypeNest):
else:
sel_var = f"{var}->{sel}"
get_lines = [f'if (!{sel_var})',
- f'return ynl_submsg_failed(yarg, "%s", "%s");' %
+ 'return ynl_submsg_failed(yarg, "%s", "%s");' %
(self.name, self['selector']),
f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
"return YNL_PARSE_CB_ERROR;"]
@@ -1125,7 +1143,7 @@ class AttrSet(SpecAttrSet):
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
if elem["sub-type"] in ['binary', 'nest', 'u32']:
- t = TypeArrayNest(self.family, self, elem, value)
+ t = TypeIndexedArray(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
elif elem['type'] == 'nest-type-value':
@@ -1187,7 +1205,7 @@ class SubMessage(SpecSubMessage):
class Family(SpecFamily):
- def __init__(self, file_name, exclude_ops):
+ def __init__(self, file_name, exclude_ops, fn_prefix):
# Added by resolve:
self.c_name = None
delattr(self, "c_name")
@@ -1219,6 +1237,8 @@ class Family(SpecFamily):
else:
self.uapi_header_name = self.ident_name
+ self.fn_prefix = fn_prefix if fn_prefix else f'{self.ident_name}-nl'
+
def resolve(self):
self.resolve_up(super())
@@ -1563,7 +1583,7 @@ class RenderInfo:
if family.is_classic():
self.fixed_hdr_len = f"sizeof(struct {c_lower(fixed_hdr)})"
else:
- raise Exception(f"Per-op fixed header not supported, yet")
+ raise Exception("Per-op fixed header not supported, yet")
# 'do' and 'dump' response parsing is identical
@@ -2034,6 +2054,20 @@ def put_enum_to_str(family, cw, enum):
_put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum)
+def put_local_vars(struct):
+ local_vars = []
+ has_array = False
+ has_count = False
+ for _, arg in struct.member_list():
+ has_array |= arg.type == 'indexed-array'
+ has_count |= arg.presence_type() == 'count'
+ if has_array:
+ local_vars.append('struct nlattr *array;')
+ if has_count:
+ local_vars.append('unsigned int i;')
+ return local_vars
+
+
def put_req_nested_prototype(ri, struct, suffix=';'):
func_args = ['struct nlmsghdr *nlh',
'unsigned int attr_type',
@@ -2056,15 +2090,7 @@ def put_req_nested(ri, struct):
init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});")
init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});")
- has_anest = False
- has_count = False
- for _, arg in struct.member_list():
- has_anest |= arg.type == 'indexed-array'
- has_count |= arg.presence_type() == 'count'
- if has_anest:
- local_vars.append('struct nlattr *array;')
- if has_count:
- local_vars.append('unsigned int i;')
+ local_vars += put_local_vars(struct)
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
@@ -2099,35 +2125,43 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if ri.family.is_classic():
iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({struct.fixed_header}))"
else:
- raise Exception(f"Per-op fixed header not supported, yet")
+ raise Exception("Per-op fixed header not supported, yet")
- array_nests = set()
+ indexed_arrays = set()
multi_attrs = set()
needs_parg = False
+ var_set = set()
for arg, aspec in struct.member_list():
if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
if aspec["sub-type"] in {'binary', 'nest'}:
- local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
- array_nests.add(arg)
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name} = NULL;')
+ indexed_arrays.add(arg)
elif aspec['sub-type'] in scalars:
- local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
- array_nests.add(arg)
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name} = NULL;')
+ indexed_arrays.add(arg)
else:
raise Exception(f'Not supported sub-type {aspec["sub-type"]}')
if 'multi-attr' in aspec:
multi_attrs.add(arg)
needs_parg |= 'nested-attributes' in aspec
needs_parg |= 'sub-message' in aspec
- if array_nests or multi_attrs:
+
+ try:
+ _, _, l_vars = aspec._attr_get(ri, '')
+ var_set |= set(l_vars) if l_vars else set()
+ except Exception:
+ pass # _attr_get() not implemented by simple types, ignore
+ local_vars += list(var_set)
+ if indexed_arrays or multi_attrs:
local_vars.append('int i;')
if needs_parg:
local_vars.append('struct ynl_parse_arg parg;')
init_lines.append('parg.ys = yarg->ys;')
- all_multi = array_nests | multi_attrs
+ all_multi = indexed_arrays | multi_attrs
- for anest in sorted(all_multi):
- local_vars.append(f"unsigned int n_{struct[anest].c_name} = 0;")
+ for arg in sorted(all_multi):
+ local_vars.append(f"unsigned int n_{struct[arg].c_name} = 0;")
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
@@ -2147,8 +2181,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
else:
ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({struct.fixed_header}));")
- for anest in sorted(all_multi):
- aspec = struct[anest]
+ for arg in sorted(all_multi):
+ aspec = struct[arg]
ri.cw.p(f"if (dst->{aspec.c_name})")
ri.cw.p(f'return ynl_error_parse(yarg, "attribute already present ({struct.attr_set.name}.{aspec.name})");')
@@ -2166,8 +2200,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_end()
ri.cw.nl()
- for anest in sorted(array_nests):
- aspec = struct[anest]
+ for arg in sorted(indexed_arrays):
+ aspec = struct[arg]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
@@ -2192,8 +2226,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_end()
ri.cw.nl()
- for anest in sorted(multi_attrs):
- aspec = struct[anest]
+ for arg in sorted(multi_attrs):
+ aspec = struct[arg]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
@@ -2348,10 +2382,7 @@ def print_req(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
- for _, attr in ri.struct["request"].member_list():
- if attr.presence_type() == 'count':
- local_vars += ['unsigned int i;']
- break
+ local_vars += put_local_vars(ri.struct['request'])
print_prototype(ri, direction, terminate=False)
ri.cw.block_start()
@@ -2418,6 +2449,9 @@ def print_dump(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
+ if 'request' in ri.op[ri.op_mode]:
+ local_vars += put_local_vars(ri.struct['request'])
+
ri.cw.write_func_lvar(local_vars)
ri.cw.p('yds.yarg.ys = ys;')
@@ -2502,7 +2536,7 @@ def print_free_prototype(ri, direction, suffix=';'):
def print_nlflags_set(ri, direction):
name = op_prefix(ri, direction)
- ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+ ri.cw.write_func_prot('static inline void', f"{name}_set_nlflags",
[f"struct {name} *req", "__u16 nl_flags"])
ri.cw.block_start()
ri.cw.p('req->_nlmsg_flags = nl_flags;')
@@ -2533,7 +2567,7 @@ def _print_type(ri, direction, struct):
line = attr.presence_member(ri.ku_space, type_filter)
if line:
if not meta_started:
- ri.cw.block_start(line=f"struct")
+ ri.cw.block_start(line="struct")
meta_started = True
ri.cw.p(line)
if meta_started:
@@ -2697,7 +2731,7 @@ def print_dump_type_free(ri):
ri.cw.nl()
_free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.')
- ri.cw.p(f'free(rsp);')
+ ri.cw.p('free(rsp);')
ri.cw.block_end()
ri.cw.block_end()
ri.cw.nl()
@@ -2708,7 +2742,7 @@ def print_ntf_type_free(ri):
ri.cw.block_start()
_free_type_members_iter(ri, ri.struct['reply'])
_free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.')
- ri.cw.p(f'free(rsp);')
+ ri.cw.p('free(rsp);')
ri.cw.block_end()
ri.cw.nl()
@@ -2803,8 +2837,6 @@ def print_kernel_policy_sparse_enum_validates(family, cw):
cw.p('/* Sparse enums validation callbacks */')
first = False
- sign = '' if attr.type[0] == 'u' else '_signed'
- suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
cw.block_start()
@@ -2881,12 +2913,12 @@ def print_kernel_op_table_fwd(family, cw, terminate):
continue
if 'do' in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-doit")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-doit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct genl_info *info'], suffix=';')
if 'dump' in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-dumpit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';')
cw.nl()
@@ -2912,7 +2944,7 @@ def print_kernel_op_table(family, cw):
for x in op['dont-validate']])), )
for op_mode in ['do', 'dump']:
if op_mode in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
members.append((op_mode + 'it', name))
if family.kernel_policy == 'per-op':
struct = Struct(family, op['attribute-set'],
@@ -2950,7 +2982,7 @@ def print_kernel_op_table(family, cw):
members.append(('validate',
' | '.join([c_upper('genl-dont-validate-' + x)
for x in dont_validate])), )
- name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
if 'pre' in op[op_mode]:
members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre'])))
members.append((op_mode + 'it', name))
@@ -3211,8 +3243,9 @@ def render_uapi(family, cw):
cw.block_end(line=';')
cw.nl()
elif const['type'] == 'const':
+ name_pfx = const.get('name-prefix', f"{family.ident_name}-")
defines.append([c_upper(family.get('c-define-name',
- f"{family.ident_name}-{const['name']}")),
+ f"{name_pfx}{const['name']}")),
const['value']])
if defines:
@@ -3324,7 +3357,7 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
if family.is_classic():
- cw.p(f'.is_classic\t= true,')
+ cw.p('.is_classic\t= true,')
cw.p(f'.classic_id\t= {family.get("protonum")},')
if family.is_classic():
if family.fixed_header:
@@ -3371,6 +3404,7 @@ def main():
help='Do not overwrite the output file if the new output is identical to the old')
parser.add_argument('--exclude-op', action='append', default=[])
parser.add_argument('-o', dest='out_file', type=str, default=None)
+ parser.add_argument('--function-prefix', dest='fn_prefix', type=str)
args = parser.parse_args()
if args.header is None:
@@ -3379,7 +3413,7 @@ def main():
exclude_ops = [re.compile(expr) for expr in args.exclude_op]
try:
- parsed = Family(args.spec, exclude_ops)
+ parsed = Family(args.spec, exclude_ops, args.fn_prefix)
if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)':
print('Spec license:', parsed.license)
print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
@@ -3399,11 +3433,16 @@ def main():
cw.p("/* Do not edit directly, auto-generated from: */")
cw.p(f"/*\t{spec_kernel} */")
cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")
- if args.exclude_op or args.user_header:
+ if args.exclude_op or args.user_header or args.fn_prefix:
line = ''
- line += ' --user-header '.join([''] + args.user_header)
- line += ' --exclude-op '.join([''] + args.exclude_op)
+ if args.user_header:
+ line += ' --user-header '.join([''] + args.user_header)
+ if args.exclude_op:
+ line += ' --exclude-op '.join([''] + args.exclude_op)
+ if args.fn_prefix:
+ line += f' --function-prefix {args.fn_prefix}'
cw.p(f'/* YNL-ARG{line} */')
+ cw.p('/* To regenerate run: tools/net/ynl/ynl-regen.sh */')
cw.nl()
if args.mode == 'uapi':
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 0cb6348e28d3..90ae19aac89d 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -10,353 +10,17 @@
This script performs extensive parsing to the Linux kernel's netlink YAML
spec files, in an effort to avoid needing to heavily mark up the original
- YAML file.
-
- This code is split in three big parts:
- 1) RST formatters: Use to convert a string to a RST output
- 2) Parser helpers: Functions to parse the YAML data structure
- 3) Main function and small helpers
+ YAML file. It uses the library code from scripts/lib.
"""
-from typing import Any, Dict, List
import os.path
+import pathlib
import sys
import argparse
import logging
-import yaml
-
-
-SPACE_PER_LEVEL = 4
-
-
-# RST Formatters
-# ==============
-def headroom(level: int) -> str:
- """Return space to format"""
- return " " * (level * SPACE_PER_LEVEL)
-
-
-def bold(text: str) -> str:
- """Format bold text"""
- return f"**{text}**"
-
-
-def inline(text: str) -> str:
- """Format inline text"""
- return f"``{text}``"
-
-
-def sanitize(text: str) -> str:
- """Remove newlines and multiple spaces"""
- # This is useful for some fields that are spread across multiple lines
- return str(text).replace("\n", " ").strip()
-
-
-def rst_fields(key: str, value: str, level: int = 0) -> str:
- """Return a RST formatted field"""
- return headroom(level) + f":{key}: {value}"
-
-
-def rst_definition(key: str, value: Any, level: int = 0) -> str:
- """Format a single rst definition"""
- return headroom(level) + key + "\n" + headroom(level + 1) + str(value)
-
-
-def rst_paragraph(paragraph: str, level: int = 0) -> str:
- """Return a formatted paragraph"""
- return headroom(level) + paragraph
-
-
-def rst_bullet(item: str, level: int = 0) -> str:
- """Return a formatted a bullet"""
- return headroom(level) + f"- {item}"
-
-
-def rst_subsection(title: str) -> str:
- """Add a sub-section to the document"""
- return f"{title}\n" + "-" * len(title)
-
-
-def rst_subsubsection(title: str) -> str:
- """Add a sub-sub-section to the document"""
- return f"{title}\n" + "~" * len(title)
-
-
-def rst_section(namespace: str, prefix: str, title: str) -> str:
- """Add a section to the document"""
- return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
-
-
-def rst_subtitle(title: str) -> str:
- """Add a subtitle to the document"""
- return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
-
-
-def rst_title(title: str) -> str:
- """Add a title to the document"""
- return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
-
-
-def rst_list_inline(list_: List[str], level: int = 0) -> str:
- """Format a list using inlines"""
- return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]"
-
-
-def rst_ref(namespace: str, prefix: str, name: str) -> str:
- """Add a hyperlink to the document"""
- mappings = {'enum': 'definition',
- 'fixed-header': 'definition',
- 'nested-attributes': 'attribute-set',
- 'struct': 'definition'}
- if prefix in mappings:
- prefix = mappings[prefix]
- return f":ref:`{namespace}-{prefix}-{name}`"
-
-
-def rst_header() -> str:
- """The headers for all the auto generated RST files"""
- lines = []
-
- lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
- lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
-
- return "\n".join(lines)
-
-
-def rst_toctree(maxdepth: int = 2) -> str:
- """Generate a toctree RST primitive"""
- lines = []
-
- lines.append(".. toctree::")
- lines.append(f" :maxdepth: {maxdepth}\n\n")
-
- return "\n".join(lines)
-
-
-def rst_label(title: str) -> str:
- """Return a formatted label"""
- return f".. _{title}:\n\n"
-
-
-# Parsers
-# =======
-
-
-def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str:
- """Parse 'multicast' group list and return a formatted string"""
- lines = []
- for group in mcast_group:
- lines.append(rst_bullet(group["name"]))
-
- return "\n".join(lines)
-
-
-def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str:
- """Parse 'do' section and return a formatted string"""
- lines = []
- for key in do_dict.keys():
- lines.append(rst_paragraph(bold(key), level + 1))
- if key in ['request', 'reply']:
- lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n")
- else:
- lines.append(headroom(level + 2) + do_dict[key] + "\n")
-
- return "\n".join(lines)
-
-
-def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str:
- """Parse 'attributes' section"""
- if "attributes" not in attrs:
- return ""
- lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)]
-
- return "\n".join(lines)
-
-
-def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str:
- """Parse operations block"""
- preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
- linkable = ["fixed-header", "attribute-set"]
- lines = []
-
- for operation in operations:
- lines.append(rst_section(namespace, 'operation', operation["name"]))
- lines.append(rst_paragraph(operation["doc"]) + "\n")
-
- for key in operation.keys():
- if key in preprocessed:
- # Skip the special fields
- continue
- value = operation[key]
- if key in linkable:
- value = rst_ref(namespace, key, value)
- lines.append(rst_fields(key, value, 0))
- if 'flags' in operation:
- lines.append(rst_fields('flags', rst_list_inline(operation['flags'])))
-
- if "do" in operation:
- lines.append(rst_paragraph(":do:", 0))
- lines.append(parse_do(operation["do"], 0))
- if "dump" in operation:
- lines.append(rst_paragraph(":dump:", 0))
- lines.append(parse_do(operation["dump"], 0))
-
- # New line after fields
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
- """Parse a list of entries"""
- ignored = ["pad"]
- lines = []
- for entry in entries:
- if isinstance(entry, dict):
- # entries could be a list or a dictionary
- field_name = entry.get("name", "")
- if field_name in ignored:
- continue
- type_ = entry.get("type")
- if type_:
- field_name += f" ({inline(type_)})"
- lines.append(
- rst_fields(field_name, sanitize(entry.get("doc", "")), level)
- )
- elif isinstance(entry, list):
- lines.append(rst_list_inline(entry, level))
- else:
- lines.append(rst_bullet(inline(sanitize(entry)), level))
-
- lines.append("\n")
- return "\n".join(lines)
-
-
-def parse_definitions(defs: Dict[str, Any], namespace: str) -> str:
- """Parse definitions section"""
- preprocessed = ["name", "entries", "members"]
- ignored = ["render-max"] # This is not printed
- lines = []
-
- for definition in defs:
- lines.append(rst_section(namespace, 'definition', definition["name"]))
- for k in definition.keys():
- if k in preprocessed + ignored:
- continue
- lines.append(rst_fields(k, sanitize(definition[k]), 0))
-
- # Field list needs to finish with a new line
- lines.append("\n")
- if "entries" in definition:
- lines.append(rst_paragraph(":entries:", 0))
- lines.append(parse_entries(definition["entries"], 1))
- if "members" in definition:
- lines.append(rst_paragraph(":members:", 0))
- lines.append(parse_entries(definition["members"], 1))
-
- return "\n".join(lines)
-
-
-def parse_attr_sets(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse attribute from attribute-set"""
- preprocessed = ["name", "type"]
- linkable = ["enum", "nested-attributes", "struct", "sub-message"]
- ignored = ["checks"]
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'attribute-set', entry["name"]))
- for attr in entry["attributes"]:
- type_ = attr.get("type")
- attr_line = attr["name"]
- if type_:
- # Add the attribute type in the same line
- attr_line += f" ({inline(type_)})"
-
- lines.append(rst_subsubsection(attr_line))
-
- for k in attr.keys():
- if k in preprocessed + ignored:
- continue
- if k in linkable:
- value = rst_ref(namespace, k, attr[k])
- else:
- value = sanitize(attr[k])
- lines.append(rst_fields(k, value, 0))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_sub_messages(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse sub-message definitions"""
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'sub-message', entry["name"]))
- for fmt in entry["formats"]:
- value = fmt["value"]
-
- lines.append(rst_bullet(bold(value)))
- for attr in ['fixed-header', 'attribute-set']:
- if attr in fmt:
- lines.append(rst_fields(attr,
- rst_ref(namespace, attr, fmt[attr]),
- 1))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_yaml(obj: Dict[str, Any]) -> str:
- """Format the whole YAML into a RST string"""
- lines = []
-
- # Main header
-
- lines.append(rst_header())
-
- family = obj['name']
-
- title = f"Family ``{family}`` netlink specification"
- lines.append(rst_title(title))
- lines.append(rst_paragraph(".. contents:: :depth: 3\n"))
-
- if "doc" in obj:
- lines.append(rst_subtitle("Summary"))
- lines.append(rst_paragraph(obj["doc"], 0))
-
- # Operations
- if "operations" in obj:
- lines.append(rst_subtitle("Operations"))
- lines.append(parse_operations(obj["operations"]["list"], family))
-
- # Multicast groups
- if "mcast-groups" in obj:
- lines.append(rst_subtitle("Multicast groups"))
- lines.append(parse_mcast_group(obj["mcast-groups"]["list"]))
-
- # Definitions
- if "definitions" in obj:
- lines.append(rst_subtitle("Definitions"))
- lines.append(parse_definitions(obj["definitions"], family))
-
- # Attributes set
- if "attribute-sets" in obj:
- lines.append(rst_subtitle("Attribute sets"))
- lines.append(parse_attr_sets(obj["attribute-sets"], family))
-
- # Sub-messages
- if "sub-messages" in obj:
- lines.append(rst_subtitle("Sub-messages"))
- lines.append(parse_sub_messages(obj["sub-messages"], family))
-
- return "\n".join(lines)
-
-
-# Main functions
-# ==============
+sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
+from lib import YnlDocGenerator # pylint: disable=C0413
def parse_arguments() -> argparse.Namespace:
"""Parse arguments from user"""
@@ -367,9 +31,6 @@ def parse_arguments() -> argparse.Namespace:
# Index and input are mutually exclusive
group = parser.add_mutually_exclusive_group()
- group.add_argument(
- "-x", "--index", action="store_true", help="Generate the index page"
- )
group.add_argument("-i", "--input", help="YAML file name")
args = parser.parse_args()
@@ -391,15 +52,6 @@ def parse_arguments() -> argparse.Namespace:
return args
-def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into an RST-formatted string"""
- with open(filename, "r", encoding="utf-8") as spec_file:
- yaml_data = yaml.safe_load(spec_file)
- content = parse_yaml(yaml_data)
-
- return content
-
-
def write_to_rstfile(content: str, filename: str) -> None:
"""Write the generated content into an RST file"""
logging.debug("Saving RST file to %s", filename)
@@ -408,35 +60,17 @@ def write_to_rstfile(content: str, filename: str) -> None:
rst_file.write(content)
-def generate_main_index_rst(output: str) -> None:
- """Generate the `networking_spec/index` content and write to the file"""
- lines = []
-
- lines.append(rst_header())
- lines.append(rst_label("specs"))
- lines.append(rst_title("Netlink Family Specifications"))
- lines.append(rst_toctree(1))
-
- index_dir = os.path.dirname(output)
- logging.debug("Looking for .rst files in %s", index_dir)
- for filename in sorted(os.listdir(index_dir)):
- if not filename.endswith(".rst") or filename == "index.rst":
- continue
- lines.append(f" {filename.replace('.rst', '')}\n")
-
- logging.debug("Writing an index file at %s", output)
- write_to_rstfile("".join(lines), output)
-
-
def main() -> None:
"""Main function that reads the YAML files and generates the RST files"""
args = parse_arguments()
+ parser = YnlDocGenerator()
+
if args.input:
logging.debug("Parsing %s", args.input)
try:
- content = parse_yaml_file(os.path.join(args.input))
+ content = parser.parse_yaml_file(os.path.join(args.input))
except Exception as exception:
logging.warning("Failed to parse %s.", args.input)
logging.warning(exception)
@@ -444,10 +78,6 @@ def main() -> None:
write_to_rstfile(content, args.output)
- if args.index:
- # Generate the index RST file
- generate_main_index_rst(args.output)
-
if __name__ == "__main__":
main()
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 7f5fca7682d7..05087ee323ba 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -7,3 +7,4 @@ rt-addr
rt-link
rt-route
tc
+tc-filter-add
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index c9494a564da4..d76cbd41cbb1 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -19,6 +19,7 @@ include $(wildcard *.d)
all: $(BINS)
CFLAGS_page-pool=$(CFLAGS_netdev)
+CFLAGS_tc-filter-add:=$(CFLAGS_tc)
$(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
@echo -e '\tCC sample $@'
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
deleted file mode 100644
index e5d521320fbf..000000000000
--- a/tools/net/ynl/samples/page-pool.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-
-#include <ynl.h>
-
-#include <net/if.h>
-
-#include "netdev-user.h"
-
-struct stat {
- unsigned int ifc;
-
- struct {
- unsigned int cnt;
- size_t refs, bytes;
- } live[2];
-
- size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
-};
-
-struct stats_array {
- unsigned int i, max;
- struct stat *s;
-};
-
-static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex)
-{
- unsigned int i;
-
- for (i = 0; i < a->i; i++) {
- if (a->s[i].ifc == ifindex)
- return &a->s[i];
- }
-
- a->i++;
- if (a->i == a->max) {
- a->max *= 2;
- a->s = reallocarray(a->s, a->max, sizeof(*a->s));
- }
- a->s[i].ifc = ifindex;
- return &a->s[i];
-}
-
-static void count(struct stat *s, unsigned int l,
- struct netdev_page_pool_get_rsp *pp)
-{
- s->live[l].cnt++;
- if (pp->_present.inflight)
- s->live[l].refs += pp->inflight;
- if (pp->_present.inflight_mem)
- s->live[l].bytes += pp->inflight_mem;
-}
-
-int main(int argc, char **argv)
-{
- struct netdev_page_pool_stats_get_list *pp_stats;
- struct netdev_page_pool_get_list *pools;
- struct stats_array a = {};
- struct ynl_error yerr;
- struct ynl_sock *ys;
-
- ys = ynl_sock_create(&ynl_netdev_family, &yerr);
- if (!ys) {
- fprintf(stderr, "YNL: %s\n", yerr.msg);
- return 1;
- }
-
- a.max = 128;
- a.s = calloc(a.max, sizeof(*a.s));
- if (!a.s)
- goto err_close;
-
- pools = netdev_page_pool_get_dump(ys);
- if (!pools)
- goto err_free;
-
- ynl_dump_foreach(pools, pp) {
- struct stat *s = find_ifc(&a, pp->ifindex);
-
- count(s, 1, pp);
- if (pp->_present.detach_time)
- count(s, 0, pp);
- }
- netdev_page_pool_get_list_free(pools);
-
- pp_stats = netdev_page_pool_stats_get_dump(ys);
- if (!pp_stats)
- goto err_free;
-
- ynl_dump_foreach(pp_stats, pp) {
- struct stat *s = find_ifc(&a, pp->info.ifindex);
-
- if (pp->_present.alloc_fast)
- s->alloc_fast += pp->alloc_fast;
- if (pp->_present.alloc_refill)
- s->alloc_fast += pp->alloc_refill;
- if (pp->_present.alloc_slow)
- s->alloc_slow += pp->alloc_slow;
- if (pp->_present.recycle_ring)
- s->recycle_ring += pp->recycle_ring;
- if (pp->_present.recycle_cached)
- s->recycle_cache += pp->recycle_cached;
- }
- netdev_page_pool_stats_get_list_free(pp_stats);
-
- for (unsigned int i = 0; i < a.i; i++) {
- char ifname[IF_NAMESIZE];
- struct stat *s = &a.s[i];
- const char *name;
- double recycle;
-
- if (!s->ifc) {
- name = "<orphan>\t";
- } else {
- name = if_indextoname(s->ifc, ifname);
- if (name)
- printf("%8s", name);
- printf("[%u]\t", s->ifc);
- }
-
- printf("page pools: %u (zombies: %u)\n",
- s->live[1].cnt, s->live[0].cnt);
- printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
- s->live[1].refs, s->live[1].bytes,
- s->live[0].refs, s->live[0].bytes);
-
- /* We don't know how many pages are sitting in cache and ring
- * so we will under-count the recycling rate a bit.
- */
- recycle = (double)(s->recycle_ring + s->recycle_cache) /
- (s->alloc_fast + s->alloc_slow) * 100;
- printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n",
- recycle, s->alloc_slow, s->alloc_fast,
- s->recycle_ring, s->recycle_cache);
- }
-
- ynl_sock_destroy(ys);
- return 0;
-
-err_free:
- free(a.s);
-err_close:
- fprintf(stderr, "YNL: %s\n", ys->err.msg);
- ynl_sock_destroy(ys);
- return 2;
-}
diff --git a/tools/net/ynl/samples/tc-filter-add.c b/tools/net/ynl/samples/tc-filter-add.c
new file mode 100644
index 000000000000..97871e9e9edc
--- /dev/null
+++ b/tools/net/ynl/samples/tc-filter-add.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <arpa/inet.h>
+#include <linux/pkt_sched.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/if_ether.h>
+#include <net/if.h>
+
+#include <ynl.h>
+
+#include "tc-user.h"
+
+#define TC_HANDLE (0xFFFF << 16)
+
+const char *vlan_act_name(struct tc_vlan *p)
+{
+ switch (p->v_action) {
+ case TCA_VLAN_ACT_POP:
+ return "pop";
+ case TCA_VLAN_ACT_PUSH:
+ return "push";
+ case TCA_VLAN_ACT_MODIFY:
+ return "modify";
+ default:
+ break;
+ }
+
+ return "not supported";
+}
+
+const char *gact_act_name(struct tc_gact *p)
+{
+ switch (p->action) {
+ case TC_ACT_SHOT:
+ return "drop";
+ case TC_ACT_OK:
+ return "ok";
+ case TC_ACT_PIPE:
+ return "pipe";
+ default:
+ break;
+ }
+
+ return "not supported";
+}
+
+static void print_vlan(struct tc_act_vlan_attrs *vlan)
+{
+ printf("%s ", vlan_act_name(vlan->parms));
+ if (vlan->_present.push_vlan_id)
+ printf("id %u ", vlan->push_vlan_id);
+ if (vlan->_present.push_vlan_protocol)
+ printf("protocol %#x ", ntohs(vlan->push_vlan_protocol));
+ if (vlan->_present.push_vlan_priority)
+ printf("priority %u ", vlan->push_vlan_priority);
+}
+
+static void print_gact(struct tc_act_gact_attrs *gact)
+{
+ struct tc_gact *p = gact->parms;
+
+ printf("%s ", gact_act_name(p));
+}
+
+static void flower_print(struct tc_flower_attrs *flower, const char *kind)
+{
+ struct tc_act_attrs *a;
+ unsigned int i;
+
+ printf("%s:\n", kind);
+
+ if (flower->_present.key_vlan_id)
+ printf(" vlan_id: %u\n", flower->key_vlan_id);
+ if (flower->_present.key_vlan_prio)
+ printf(" vlan_prio: %u\n", flower->key_vlan_prio);
+ if (flower->_present.key_num_of_vlans)
+ printf(" num_of_vlans: %u\n", flower->key_num_of_vlans);
+
+ for (i = 0; i < flower->_count.act; i++) {
+ a = &flower->act[i];
+ printf("action order: %i %s ", i + 1, a->kind);
+ if (a->options._present.vlan)
+ print_vlan(&a->options.vlan);
+ else if (a->options._present.gact)
+ print_gact(&a->options.gact);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static void tc_filter_print(struct tc_gettfilter_rsp *f)
+{
+ struct tc_options_msg *opt = &f->options;
+
+ if (opt->_present.flower)
+ flower_print(&opt->flower, f->kind);
+ else if (f->_len.kind)
+ printf("%s pref %u proto: %#x\n", f->kind,
+ (f->_hdr.tcm_info >> 16),
+ ntohs(TC_H_MIN(f->_hdr.tcm_info)));
+}
+
+static int tc_filter_add(struct ynl_sock *ys, int ifi)
+{
+ struct tc_newtfilter_req *req;
+ struct tc_act_attrs *acts;
+ struct tc_vlan p = {
+ .action = TC_ACT_PIPE,
+ .v_action = TCA_VLAN_ACT_PUSH
+ };
+ __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+ int ret;
+
+ req = tc_newtfilter_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_newtfilter_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ acts = tc_act_attrs_alloc(3);
+ if (!acts) {
+ fprintf(stderr, "tc_act_attrs_alloc\n");
+ tc_newtfilter_req_free(req);
+ return -1;
+ }
+ memset(acts, 0, sizeof(*acts) * 3);
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+ req->chain = 0;
+
+ tc_newtfilter_req_set_nlflags(req, flags);
+ tc_newtfilter_req_set_kind(req, "flower");
+ tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100);
+ tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5);
+ tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3);
+
+ __tc_newtfilter_req_set_options_flower_act(req, acts, 3);
+
+ /* Skip action at index 0 because in TC, the action array
+ * index starts at 1, with each index defining the action's
+ * order. In contrast, in YNL indexed arrays start at index 0.
+ */
+ tc_act_attrs_set_kind(&acts[1], "vlan");
+ tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p));
+ tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200);
+ tc_act_attrs_set_kind(&acts[2], "vlan");
+ tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p));
+ tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300);
+
+ tc_newtfilter_req_set_options_flower_flags(req, 0);
+ tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100));
+
+ ret = tc_newtfilter(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_newtfilter: %s\n", ys->err.msg);
+
+ tc_newtfilter_req_free(req);
+
+ return ret;
+}
+
+static int tc_filter_show(struct ynl_sock *ys, int ifi)
+{
+ struct tc_gettfilter_req_dump *req;
+ struct tc_gettfilter_list *rsp;
+
+ req = tc_gettfilter_req_dump_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_gettfilter_req_dump_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_present.chain = 1;
+ req->chain = 0;
+
+ rsp = tc_gettfilter_dump(ys, req);
+ tc_gettfilter_req_dump_free(req);
+ if (!rsp) {
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ return -1;
+ }
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no filters reported\n");
+ else
+ ynl_dump_foreach(rsp, flt) tc_filter_print(flt);
+
+ tc_gettfilter_list_free(rsp);
+
+ return 0;
+}
+
+static int tc_filter_del(struct ynl_sock *ys, int ifi)
+{
+ struct tc_deltfilter_req *req;
+ __u16 flags = NLM_F_REQUEST;
+ int ret;
+
+ req = tc_deltfilter_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_deltfilter_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+ tc_deltfilter_req_set_nlflags(req, flags);
+
+ ret = tc_deltfilter(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_deltfilter failed: %s\n", ys->err.msg);
+
+ tc_deltfilter_req_free(req);
+
+ return ret;
+}
+
+static int tc_clsact_add(struct ynl_sock *ys, int ifi)
+{
+ struct tc_newqdisc_req *req;
+ __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+ int ret;
+
+ req = tc_newqdisc_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_newqdisc_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_CLSACT;
+ req->_hdr.tcm_handle = TC_HANDLE;
+ tc_newqdisc_req_set_nlflags(req, flags);
+ tc_newqdisc_req_set_kind(req, "clsact");
+
+ ret = tc_newqdisc(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_newqdisc failed: %s\n", ys->err.msg);
+
+ tc_newqdisc_req_free(req);
+
+ return ret;
+}
+
+static int tc_clsact_del(struct ynl_sock *ys, int ifi)
+{
+ struct tc_delqdisc_req *req;
+ __u16 flags = NLM_F_REQUEST;
+ int ret;
+
+ req = tc_delqdisc_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_delqdisc_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_CLSACT;
+ req->_hdr.tcm_handle = TC_HANDLE;
+ tc_delqdisc_req_set_nlflags(req, flags);
+
+ ret = tc_delqdisc(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_delqdisc failed: %s\n", ys->err.msg);
+
+ tc_delqdisc_req_free(req);
+
+ return ret;
+}
+
+static int tc_filter_config(struct ynl_sock *ys, int ifi)
+{
+ int ret = 0;
+
+ if (tc_filter_add(ys, ifi))
+ return -1;
+
+ ret = tc_filter_show(ys, ifi);
+
+ if (tc_filter_del(ys, ifi))
+ return -1;
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ifi, ret = 0;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <interface_name>\n", argv[0]);
+ return 1;
+ }
+ ifi = if_nametoindex(argv[1]);
+ if (!ifi) {
+ perror("if_nametoindex");
+ return 1;
+ }
+
+ ys = ynl_sock_create(&ynl_tc_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ if (tc_clsact_add(ys, ifi)) {
+ ret = 2;
+ goto err_destroy;
+ }
+
+ if (tc_filter_config(ys, ifi))
+ ret = 3;
+
+ if (tc_clsact_del(ys, ifi))
+ ret = 4;
+
+err_destroy:
+ ynl_sock_destroy(ys);
+ return ret;
+}
diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile
new file mode 100644
index 000000000000..c1df2e001255
--- /dev/null
+++ b/tools/net/ynl/tests/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for YNL tests
+
+TESTS := \
+ test_ynl_cli.sh \
+ test_ynl_ethtool.sh \
+# end of TESTS
+
+all: $(TESTS)
+
+run_tests:
+ @for test in $(TESTS); do \
+ ./$$test; \
+ done
+
+install: $(TESTS)
+ @mkdir -p $(DESTDIR)/usr/bin
+ @mkdir -p $(DESTDIR)/usr/share/kselftest
+ @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/
+ @for test in $(TESTS); do \
+ name=$$(basename $$test .sh); \
+ sed -e 's|^ynl=.*|ynl="ynl"|' \
+ -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \
+ -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \
+ $$test > $(DESTDIR)/usr/bin/$$name; \
+ chmod +x $(DESTDIR)/usr/bin/$$name; \
+ done
+
+clean distclean:
+ @# Nothing to clean
+
+.PHONY: all install clean run_tests
diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config
new file mode 100644
index 000000000000..339f1309c03f
--- /dev/null
+++ b/tools/net/ynl/tests/config
@@ -0,0 +1,6 @@
+CONFIG_DUMMY=m
+CONFIG_INET_DIAG=y
+CONFIG_IPV6=y
+CONFIG_NET_NS=y
+CONFIG_NETDEVSIM=m
+CONFIG_VETH=m
diff --git a/tools/net/ynl/tests/test_ynl_cli.sh b/tools/net/ynl/tests/test_ynl_cli.sh
new file mode 100755
index 000000000000..7c0722a08117
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_cli.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL CLI functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl path for direct execution, can be overridden by make install
+ynl="../pyynl/cli.py"
+
+readonly NSIM_ID="1338"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Test listing available families
+cli_list_families()
+{
+ if $ynl --list-families &>/dev/null; then
+ ktap_test_pass "YNL CLI list families"
+ else
+ ktap_test_fail "YNL CLI list families"
+ fi
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test netdev family operations (dev-get, queue-get)
+cli_netdev_ops()
+{
+ local dev_output
+ local ifindex
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ dev_output=$(ip netns exec "$testns" $ynl --family netdev \
+ --do dev-get --json "{\"ifindex\": $ifindex}" 2>/dev/null)
+
+ if ! echo "$dev_output" | grep -q "ifindex"; then
+ ktap_test_fail "YNL CLI netdev operations (netdev dev-get output missing ifindex)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl --family netdev \
+ --dump queue-get --json "{\"ifindex\": $ifindex}" &>/dev/null; then
+ ktap_test_fail "YNL CLI netdev operations (failed to get netdev queue info)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI netdev operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test ethtool family operations (rings-get, linkinfo-get)
+cli_ethtool_ops()
+{
+ local rings_output
+ local linkinfo_output
+
+ rings_output=$(ip netns exec "$testns" $ynl --family ethtool \
+ --do rings-get --json "{\"header\": {\"dev-name\": \"$NSIM_DEV_NAME\"}}" 2>/dev/null)
+
+ if ! echo "$rings_output" | grep -q "header"; then
+ ktap_test_fail "YNL CLI ethtool operations (ethtool rings-get output missing header)"
+ return
+ fi
+
+ linkinfo_output=$(ip netns exec "$testns" $ynl --family ethtool \
+ --do linkinfo-get --json "{\"header\": {\"dev-name\": \"$VETH_A\"}}" 2>/dev/null)
+
+ if ! echo "$linkinfo_output" | grep -q "header"; then
+ ktap_test_fail "YNL CLI ethtool operations (ethtool linkinfo-get output missing header)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI ethtool operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-route family operations
+cli_rt_route_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-route"; then
+ ktap_test_skip "YNL CLI rt-route operations (rt-route family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ # Add route: 192.0.2.0/24 dev $dev scope link
+ if ! ip netns exec "$testns" $ynl --family rt-route --do newroute --create \
+ --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-route operations (failed to add route)"
+ return
+ fi
+
+ local route_output
+ route_output=$(ip netns exec "$testns" $ynl --family rt-route --dump getroute 2>/dev/null)
+ if echo "$route_output" | grep -q "192.0.2.0"; then
+ ktap_test_pass "YNL CLI rt-route operations"
+ else
+ ktap_test_fail "YNL CLI rt-route operations (failed to verify route)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-route --do delroute \
+ --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-addr family operations
+cli_rt_addr_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-addr"; then
+ ktap_test_skip "YNL CLI rt-addr operations (rt-addr family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ if ! ip netns exec "$testns" $ynl --family rt-addr --do newaddr \
+ --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-addr operations (failed to add address)"
+ return
+ fi
+
+ local addr_output
+ addr_output=$(ip netns exec "$testns" $ynl --family rt-addr --dump getaddr 2>/dev/null)
+ if echo "$addr_output" | grep -q "192.0.2.100"; then
+ ktap_test_pass "YNL CLI rt-addr operations"
+ else
+ ktap_test_fail "YNL CLI rt-addr operations (failed to verify address)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-addr --do deladdr \
+ --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-link family operations
+cli_rt_link_ops()
+{
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-link"; then
+ ktap_test_skip "YNL CLI rt-link operations (rt-link family not available)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl --family rt-link --do newlink --create \
+ --json "{\"ifname\": \"dummy0\", \"linkinfo\": {\"kind\": \"dummy\"}}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-link operations (failed to add link)"
+ return
+ fi
+
+ local link_output
+ link_output=$(ip netns exec "$testns" $ynl --family rt-link --dump getlink 2>/dev/null)
+ if echo "$link_output" | grep -q "$NSIM_DEV_NAME" && echo "$link_output" | grep -q "dummy0"; then
+ ktap_test_pass "YNL CLI rt-link operations"
+ else
+ ktap_test_fail "YNL CLI rt-link operations (failed to verify link)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-link --do dellink \
+ --json "{\"ifname\": \"dummy0\"}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-neigh family operations
+cli_rt_neigh_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-neigh"; then
+ ktap_test_skip "YNL CLI rt-neigh operations (rt-neigh family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ # Add neighbor: 192.0.2.1 dev nsim1338 lladdr 11:22:33:44:55:66 PERMANENT
+ if ! ip netns exec "$testns" $ynl --family rt-neigh --do newneigh --create \
+ --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2, \"ndm-state\": 128}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-neigh operations (failed to add neighbor)"
+ fi
+
+ local neigh_output
+ neigh_output=$(ip netns exec "$testns" $ynl --family rt-neigh --dump getneigh 2>/dev/null)
+ if echo "$neigh_output" | grep -q "192.0.2.1"; then
+ ktap_test_pass "YNL CLI rt-neigh operations"
+ else
+ ktap_test_fail "YNL CLI rt-neigh operations (failed to verify neighbor)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-neigh --do delneigh \
+ --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-rule family operations
+cli_rt_rule_ops()
+{
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-rule"; then
+ ktap_test_skip "YNL CLI rt-rule operations (rt-rule family not available)"
+ return
+ fi
+
+ # Add rule: from 192.0.2.0/24 lookup 100 none
+ if ! ip netns exec "$testns" $ynl --family rt-rule --do newrule \
+ --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-rule operations (failed to add rule)"
+ return
+ fi
+
+ local rule_output
+ rule_output=$(ip netns exec "$testns" $ynl --family rt-rule --dump getrule 2>/dev/null)
+ if echo "$rule_output" | grep -q "192.0.2.0"; then
+ ktap_test_pass "YNL CLI rt-rule operations"
+ else
+ ktap_test_fail "YNL CLI rt-rule operations (failed to verify rule)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-rule --do delrule \
+ --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test nlctrl family operations
+cli_nlctrl_ops()
+{
+ local family_output
+
+ if ! family_output=$($ynl --family nlctrl \
+ --do getfamily --json "{\"family-name\": \"netdev\"}" 2>/dev/null); then
+ ktap_test_fail "YNL CLI nlctrl getfamily (failed to get nlctrl family info)"
+ return
+ fi
+
+ if ! echo "$family_output" | grep -q "family-name"; then
+ ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-name)"
+ return
+ fi
+
+ if ! echo "$family_output" | grep -q "family-id"; then
+ ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-id)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI nlctrl getfamily"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+ if ! [ -f /sys/bus/netdevsim/new_device ]; then
+ ktap_skip_all "netdevsim module not available"
+ exit "$KSFT_SKIP"
+ fi
+
+ if ! ip netns add "$testns" 2>/dev/null; then
+ ktap_skip_all "failed to create test namespace"
+ exit "$KSFT_SKIP"
+ fi
+
+ echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+ ktap_skip_all "failed to create netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ local dev
+ dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+ if [[ -z "$dev" ]]; then
+ ktap_skip_all "failed to find netdevsim device"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+ ktap_skip_all "failed to rename netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+ if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+ ktap_skip_all "failed to create veth pair"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+ ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+ ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+ ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl command is available
+if ! command -v $ynl &>/dev/null && [[ ! -x $ynl ]]; then
+ ktap_skip_all "ynl command not found: $ynl"
+ exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+cli_list_families
+cli_netdev_ops
+cli_ethtool_ops
+cli_rt_route_ops
+cli_rt_addr_ops
+cli_rt_link_ops
+cli_rt_neigh_ops
+cli_rt_rule_ops
+cli_nlctrl_ops
+
+ktap_finished
diff --git a/tools/net/ynl/tests/test_ynl_ethtool.sh b/tools/net/ynl/tests/test_ynl_ethtool.sh
new file mode 100755
index 000000000000..b826269017f4
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_ethtool.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL ethtool functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl-ethtool path for direct execution, can be overridden by make install
+ynl_ethtool="../pyynl/ethtool.py"
+
+readonly NSIM_ID="1337"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-ethtool-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Uses veth device as netdevsim doesn't support basic ethtool device info
+ethtool_device_info()
+{
+ local info_output
+
+ info_output=$(ip netns exec "$testns" $ynl_ethtool "$VETH_A" 2>/dev/null)
+
+ if ! echo "$info_output" | grep -q "Settings for"; then
+ ktap_test_fail "YNL ethtool device info (device info output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool device info"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_statistics()
+{
+ local stats_output
+
+ stats_output=$(ip netns exec "$testns" $ynl_ethtool --statistics "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$stats_output" | grep -q -E "(NIC statistics|packets|bytes)"; then
+ ktap_test_fail "YNL ethtool statistics (statistics output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool statistics"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_ring_params()
+{
+ local ring_output
+
+ ring_output=$(ip netns exec "$testns" $ynl_ethtool --show-ring "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$ring_output" | grep -q -E "(Ring parameters|RX|TX)"; then
+ ktap_test_fail "YNL ethtool ring parameters (ring parameters output missing expected content)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-ring "$NSIM_DEV_NAME" rx 64 2>/dev/null; then
+ ktap_test_fail "YNL ethtool ring parameters (set-ring command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool ring parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_coalesce_params()
+{
+ if ! ip netns exec "$testns" $ynl_ethtool --show-coalesce "$NSIM_DEV_NAME" &>/dev/null; then
+ ktap_test_fail "YNL ethtool coalesce parameters (failed to get coalesce parameters)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-coalesce "$NSIM_DEV_NAME" rx-usecs 50 2>/dev/null; then
+ ktap_test_fail "YNL ethtool coalesce parameters (set-coalesce command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool coalesce parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_pause_params()
+{
+ if ! ip netns exec "$testns" $ynl_ethtool --show-pause "$NSIM_DEV_NAME" &>/dev/null; then
+ ktap_test_fail "YNL ethtool pause parameters (failed to get pause parameters)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-pause "$NSIM_DEV_NAME" tx 1 rx 1 2>/dev/null; then
+ ktap_test_fail "YNL ethtool pause parameters (set-pause command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool pause parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_features_info()
+{
+ local features_output
+
+ features_output=$(ip netns exec "$testns" $ynl_ethtool --show-features "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$features_output" | grep -q -E "(Features|offload)"; then
+ ktap_test_fail "YNL ethtool features info (features output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool features info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_channels_info()
+{
+ local channels_output
+
+ channels_output=$(ip netns exec "$testns" $ynl_ethtool --show-channels "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$channels_output" | grep -q -E "(Channel|Combined|RX|TX)"; then
+ ktap_test_fail "YNL ethtool channels info (channels output missing expected content)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-channels "$NSIM_DEV_NAME" combined-count 1 2>/dev/null; then
+ ktap_test_fail "YNL ethtool channels info (set-channels command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool channels info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_time_stamping()
+{
+ local ts_output
+
+ ts_output=$(ip netns exec "$testns" $ynl_ethtool --show-time-stamping "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$ts_output" | grep -q -E "(Time stamping|timestamping|SOF_TIMESTAMPING)"; then
+ ktap_test_fail "YNL ethtool time stamping (time stamping output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool time stamping"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+ if ! [ -f /sys/bus/netdevsim/new_device ]; then
+ ktap_skip_all "netdevsim module not available"
+ exit "$KSFT_SKIP"
+ fi
+
+ if ! ip netns add "$testns" 2>/dev/null; then
+ ktap_skip_all "failed to create test namespace"
+ exit "$KSFT_SKIP"
+ fi
+
+ echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+ ktap_skip_all "failed to create netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ local dev
+ dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+ if [[ -z "$dev" ]]; then
+ ktap_skip_all "failed to find netdevsim device"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+ ktap_skip_all "failed to rename netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+ if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+ ktap_skip_all "failed to create veth pair"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+ ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+ ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+ ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl-ethtool command is available
+if ! command -v $ynl_ethtool &>/dev/null && [[ ! -x $ynl_ethtool ]]; then
+ ktap_skip_all "ynl-ethtool command not found: $ynl_ethtool"
+ exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+ethtool_device_info
+ethtool_statistics
+ethtool_ring_params
+ethtool_coalesce_params
+ethtool_pause_params
+ethtool_features_info
+ethtool_channels_info
+ethtool_time_stamping
+
+ktap_finished
diff --git a/tools/net/ynl/ynltool/.gitignore b/tools/net/ynl/ynltool/.gitignore
new file mode 100644
index 000000000000..690d399c921a
--- /dev/null
+++ b/tools/net/ynl/ynltool/.gitignore
@@ -0,0 +1,2 @@
+ynltool
+*.d
diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile
new file mode 100644
index 000000000000..f5b1de32daa5
--- /dev/null
+++ b/tools/net/ynl/ynltool/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../Makefile.deps
+
+INSTALL ?= install
+prefix ?= /usr
+
+CC := gcc
+CFLAGS := -Wall -Wextra -Werror -O2
+ifeq ("$(DEBUG)","1")
+ CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+CFLAGS += -I../lib -I../generated -I../../../include/uapi/
+
+SRC_VERSION := \
+ $(shell make --no-print-directory -sC ../../../.. kernelversion || \
+ echo "unknown")
+
+CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"'
+
+SRCS := $(wildcard *.c)
+OBJS := $(patsubst %.c,$(OUTPUT)%.o,$(SRCS))
+
+YNLTOOL := $(OUTPUT)ynltool
+
+include $(wildcard *.d)
+
+all: $(YNLTOOL)
+
+Q = @
+
+$(YNLTOOL): ../libynl.a $(OBJS)
+ $(Q)echo -e "\tLINK $@"
+ $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lm
+
+%.o: %.c ../libynl.a
+ $(Q)echo -e "\tCC $@"
+ $(Q)$(COMPILE.c) -MMD -c -o $@ $<
+
+../libynl.a:
+ $(Q)$(MAKE) -C ../
+
+clean:
+ rm -f *.o *.d *~
+
+distclean: clean
+ rm -f $(YNLTOOL)
+
+bindir ?= /usr/bin
+
+install: $(YNLTOOL)
+ $(INSTALL) -m 0755 $(YNLTOOL) $(DESTDIR)$(bindir)/$(YNLTOOL)
+
+.PHONY: all clean distclean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/ynltool/json_writer.c b/tools/net/ynl/ynltool/json_writer.c
new file mode 100644
index 000000000000..c8685e592cd3
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "json_writer.h"
+
+struct json_writer {
+ FILE *out;
+ unsigned depth;
+ bool pretty;
+ char sep;
+};
+
+static void jsonw_indent(json_writer_t *self)
+{
+ unsigned i;
+ for (i = 0; i < self->depth; ++i)
+ fputs(" ", self->out);
+}
+
+static void jsonw_eol(json_writer_t *self)
+{
+ if (!self->pretty)
+ return;
+
+ putc('\n', self->out);
+ jsonw_indent(self);
+}
+
+static void jsonw_eor(json_writer_t *self)
+{
+ if (self->sep != '\0')
+ putc(self->sep, self->out);
+ self->sep = ',';
+}
+
+static void jsonw_puts(json_writer_t *self, const char *str)
+{
+ putc('"', self->out);
+ for (; *str; ++str)
+ switch (*str) {
+ case '\t':
+ fputs("\\t", self->out);
+ break;
+ case '\n':
+ fputs("\\n", self->out);
+ break;
+ case '\r':
+ fputs("\\r", self->out);
+ break;
+ case '\f':
+ fputs("\\f", self->out);
+ break;
+ case '\b':
+ fputs("\\b", self->out);
+ break;
+ case '\\':
+ fputs("\\\\", self->out);
+ break;
+ case '"':
+ fputs("\\\"", self->out);
+ break;
+ default:
+ putc(*str, self->out);
+ }
+ putc('"', self->out);
+}
+
+json_writer_t *jsonw_new(FILE *f)
+{
+ json_writer_t *self = malloc(sizeof(*self));
+ if (self) {
+ self->out = f;
+ self->depth = 0;
+ self->pretty = false;
+ self->sep = '\0';
+ }
+ return self;
+}
+
+void jsonw_destroy(json_writer_t **self_p)
+{
+ json_writer_t *self = *self_p;
+
+ assert(self->depth == 0);
+ fputs("\n", self->out);
+ fflush(self->out);
+ free(self);
+ *self_p = NULL;
+}
+
+void jsonw_pretty(json_writer_t *self, bool on)
+{
+ self->pretty = on;
+}
+
+void jsonw_reset(json_writer_t *self)
+{
+ assert(self->depth == 0);
+ self->sep = '\0';
+}
+
+static void jsonw_begin(json_writer_t *self, int c)
+{
+ jsonw_eor(self);
+ putc(c, self->out);
+ ++self->depth;
+ self->sep = '\0';
+}
+
+static void jsonw_end(json_writer_t *self, int c)
+{
+ assert(self->depth > 0);
+
+ --self->depth;
+ if (self->sep != '\0')
+ jsonw_eol(self);
+ putc(c, self->out);
+ self->sep = ',';
+}
+
+void jsonw_name(json_writer_t *self, const char *name)
+{
+ jsonw_eor(self);
+ jsonw_eol(self);
+ self->sep = '\0';
+ jsonw_puts(self, name);
+ putc(':', self->out);
+ if (self->pretty)
+ putc(' ', self->out);
+}
+
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+{
+ jsonw_eor(self);
+ putc('"', self->out);
+ vfprintf(self->out, fmt, ap);
+ putc('"', self->out);
+}
+
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ jsonw_eor(self);
+ vfprintf(self->out, fmt, ap);
+ va_end(ap);
+}
+
+void jsonw_start_object(json_writer_t *self)
+{
+ jsonw_begin(self, '{');
+}
+
+void jsonw_end_object(json_writer_t *self)
+{
+ jsonw_end(self, '}');
+}
+
+void jsonw_start_array(json_writer_t *self)
+{
+ jsonw_begin(self, '[');
+}
+
+void jsonw_end_array(json_writer_t *self)
+{
+ jsonw_end(self, ']');
+}
+
+void jsonw_string(json_writer_t *self, const char *value)
+{
+ jsonw_eor(self);
+ jsonw_puts(self, value);
+}
+
+void jsonw_bool(json_writer_t *self, bool val)
+{
+ jsonw_printf(self, "%s", val ? "true" : "false");
+}
+
+void jsonw_null(json_writer_t *self)
+{
+ jsonw_printf(self, "null");
+}
+
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num)
+{
+ jsonw_printf(self, fmt, num);
+}
+
+void jsonw_float(json_writer_t *self, double num)
+{
+ jsonw_printf(self, "%g", num);
+}
+
+void jsonw_hu(json_writer_t *self, unsigned short num)
+{
+ jsonw_printf(self, "%hu", num);
+}
+
+void jsonw_uint(json_writer_t *self, uint64_t num)
+{
+ jsonw_printf(self, "%"PRIu64, num);
+}
+
+void jsonw_lluint(json_writer_t *self, unsigned long long int num)
+{
+ jsonw_printf(self, "%llu", num);
+}
+
+void jsonw_int(json_writer_t *self, int64_t num)
+{
+ jsonw_printf(self, "%"PRId64, num);
+}
+
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val)
+{
+ jsonw_name(self, prop);
+ jsonw_string(self, val);
+}
+
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool val)
+{
+ jsonw_name(self, prop);
+ jsonw_bool(self, val);
+}
+
+void jsonw_float_field(json_writer_t *self, const char *prop, double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float(self, val);
+}
+
+void jsonw_float_field_fmt(json_writer_t *self,
+ const char *prop,
+ const char *fmt,
+ double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float_fmt(self, fmt, val);
+}
+
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_uint(self, num);
+}
+
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num)
+{
+ jsonw_name(self, prop);
+ jsonw_hu(self, num);
+}
+
+void jsonw_lluint_field(json_writer_t *self,
+ const char *prop,
+ unsigned long long int num)
+{
+ jsonw_name(self, prop);
+ jsonw_lluint(self, num);
+}
+
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_int(self, num);
+}
+
+void jsonw_null_field(json_writer_t *self, const char *prop)
+{
+ jsonw_name(self, prop);
+ jsonw_null(self);
+}
diff --git a/tools/net/ynl/ynltool/json_writer.h b/tools/net/ynl/ynltool/json_writer.h
new file mode 100644
index 000000000000..0f1e63c88f6a
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _JSON_WRITER_H_
+#define _JSON_WRITER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+/* Opaque class structure */
+typedef struct json_writer json_writer_t;
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f);
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p);
+
+/* Cause output to have pretty whitespace */
+void jsonw_pretty(json_writer_t *self, bool on);
+
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
+/* Add property name */
+void jsonw_name(json_writer_t *self, const char *name);
+
+/* Add value */
+void __attribute__((format(printf, 2, 0))) jsonw_vprintf_enquote(json_writer_t *self,
+ const char *fmt,
+ va_list ap);
+void __attribute__((format(printf, 2, 3))) jsonw_printf(json_writer_t *self,
+ const char *fmt, ...);
+void jsonw_string(json_writer_t *self, const char *value);
+void jsonw_bool(json_writer_t *self, bool value);
+void jsonw_float(json_writer_t *self, double number);
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
+void jsonw_uint(json_writer_t *self, uint64_t number);
+void jsonw_hu(json_writer_t *self, unsigned short number);
+void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_null(json_writer_t *self);
+void jsonw_lluint(json_writer_t *self, unsigned long long int num);
+
+/* Useful Combinations of name and value */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
+void jsonw_float_field(json_writer_t *self, const char *prop, double num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_null_field(json_writer_t *self, const char *prop);
+void jsonw_lluint_field(json_writer_t *self, const char *prop,
+ unsigned long long int num);
+void jsonw_float_field_fmt(json_writer_t *self, const char *prop,
+ const char *fmt, double val);
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self);
+void jsonw_end_object(json_writer_t *self);
+
+void jsonw_start_array(json_writer_t *self);
+void jsonw_end_array(json_writer_t *self);
+
+/* Override default exception handling */
+typedef void (jsonw_err_handler_fn)(const char *);
+
+#endif /* _JSON_WRITER_H_ */
diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c
new file mode 100644
index 000000000000..5d0f428eed0a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "main.h"
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+json_writer_t *json_wtr;
+bool pretty_output;
+bool json_output;
+
+static void __attribute__((noreturn)) clean_and_exit(int i)
+{
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ exit(i);
+}
+
+void usage(void)
+{
+ last_do_help(last_argc - 1, last_argv + 1);
+
+ clean_and_exit(-1);
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n"
+ " %s version\n"
+ "\n"
+ " OBJECT := { page-pool | qstats }\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, bin_name);
+
+ return 0;
+}
+
+static int do_version(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "version");
+ jsonw_printf(json_wtr, SRC_VERSION);
+ jsonw_end_object(json_wtr);
+ } else {
+ printf("%s " SRC_VERSION "\n", bin_name);
+ }
+ return 0;
+}
+
+static const struct cmd commands[] = {
+ { "help", do_help },
+ { "page-pool", do_page_pool },
+ { "qstats", do_qstats },
+ { "version", do_version },
+ { 0 }
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv))
+{
+ unsigned int i;
+
+ last_argc = argc;
+ last_argv = argv;
+ last_do_help = help;
+
+ if (argc < 1 && cmds[0].func)
+ return cmds[0].func(argc, argv);
+
+ for (i = 0; cmds[i].cmd; i++) {
+ if (is_prefix(*argv, cmds[i].cmd)) {
+ if (!cmds[i].func) {
+ p_err("command '%s' is not available", cmds[i].cmd);
+ return -1;
+ }
+ return cmds[i].func(argc - 1, argv + 1);
+ }
+ }
+
+ help(argc - 1, argv + 1);
+
+ return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+ if (!pfx)
+ return false;
+ if (strlen(str) < strlen(pfx))
+ return false;
+
+ return !memcmp(str, pfx, strlen(pfx));
+}
+
+/* Last argument MUST be NULL pointer */
+int detect_common_prefix(const char *arg, ...)
+{
+ unsigned int count = 0;
+ const char *ref;
+ char msg[256];
+ va_list ap;
+
+ snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg);
+ va_start(ap, arg);
+ while ((ref = va_arg(ap, const char *))) {
+ if (!is_prefix(arg, ref))
+ continue;
+ count++;
+ if (count > 1)
+ strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1);
+ strncat(msg, ref, sizeof(msg) - strlen(msg) - 1);
+ }
+ va_end(ap);
+ strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
+
+ if (count >= 2) {
+ p_err("%s", msg);
+ return -1;
+ }
+
+ return 0;
+}
+
+void p_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "error");
+ jsonw_vprintf_enquote(json_wtr, fmt, ap);
+ jsonw_end_object(json_wtr);
+ } else {
+ fprintf(stderr, "Error: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ }
+ va_end(ap);
+}
+
+void p_info(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (json_output)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+}
+
+int main(int argc, char **argv)
+{
+ static const struct option options[] = {
+ { "json", no_argument, NULL, 'j' },
+ { "help", no_argument, NULL, 'h' },
+ { "pretty", no_argument, NULL, 'p' },
+ { "version", no_argument, NULL, 'V' },
+ { 0 }
+ };
+ bool version_requested = false;
+ int opt, ret;
+
+ setlinebuf(stdout);
+
+ last_do_help = do_help;
+ pretty_output = false;
+ json_output = false;
+ bin_name = "ynltool";
+
+ opterr = 0;
+ while ((opt = getopt_long(argc, argv, "Vhjp",
+ options, NULL)) >= 0) {
+ switch (opt) {
+ case 'V':
+ version_requested = true;
+ break;
+ case 'h':
+ return do_help(argc, argv);
+ case 'p':
+ pretty_output = true;
+ /* fall through */
+ case 'j':
+ if (!json_output) {
+ json_wtr = jsonw_new(stdout);
+ if (!json_wtr) {
+ p_err("failed to create JSON writer");
+ return -1;
+ }
+ json_output = true;
+ }
+ jsonw_pretty(json_wtr, pretty_output);
+ break;
+ default:
+ p_err("unrecognized option '%s'", argv[optind - 1]);
+ if (json_output)
+ clean_and_exit(-1);
+ else
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+ if (argc < 0)
+ usage();
+
+ if (version_requested)
+ ret = do_version(argc, argv);
+ else
+ ret = cmd_select(commands, argc, argv, do_help);
+
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ return ret;
+}
diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h
new file mode 100644
index 000000000000..c7039f9ac55a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#ifndef __YNLTOOL_H
+#define __YNLTOOL_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "json_writer.h"
+
+#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG() ({ argc--; *argv++; })
+#define REQ_ARGS(cnt) \
+ ({ \
+ int _cnt = (cnt); \
+ bool _res; \
+ \
+ if (argc < _cnt) { \
+ p_err("'%s' needs at least %d arguments, %d found", \
+ argv[-1], _cnt, argc); \
+ _res = false; \
+ } else { \
+ _res = true; \
+ } \
+ _res; \
+ })
+
+#define HELP_SPEC_OPTIONS \
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] }"
+
+extern const char *bin_name;
+
+extern json_writer_t *json_wtr;
+extern bool json_output;
+extern bool pretty_output;
+
+void __attribute__((format(printf, 1, 2))) p_err(const char *fmt, ...);
+void __attribute__((format(printf, 1, 2))) p_info(const char *fmt, ...);
+
+bool is_prefix(const char *pfx, const char *str);
+int detect_common_prefix(const char *arg, ...);
+void usage(void) __attribute__((noreturn));
+
+struct cmd {
+ const char *cmd;
+ int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv));
+
+/* subcommands */
+int do_page_pool(int argc, char **argv);
+int do_qstats(int argc, char **argv);
+
+#endif /* __YNLTOOL_H */
diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c
new file mode 100644
index 000000000000..4b24492abab7
--- /dev/null
+++ b/tools/net/ynl/ynltool/page-pool.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+struct pp_stat {
+ unsigned int ifc;
+
+ struct {
+ unsigned int cnt;
+ size_t refs, bytes;
+ } live[2];
+
+ size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
+};
+
+struct pp_stats_array {
+ unsigned int i, max;
+ struct pp_stat *s;
+};
+
+static struct pp_stat *find_ifc(struct pp_stats_array *a, unsigned int ifindex)
+{
+ unsigned int i;
+
+ for (i = 0; i < a->i; i++) {
+ if (a->s[i].ifc == ifindex)
+ return &a->s[i];
+ }
+
+ a->i++;
+ if (a->i == a->max) {
+ a->max *= 2;
+ a->s = reallocarray(a->s, a->max, sizeof(*a->s));
+ }
+ a->s[i].ifc = ifindex;
+ return &a->s[i];
+}
+
+static void count_pool(struct pp_stat *s, unsigned int l,
+ struct netdev_page_pool_get_rsp *pp)
+{
+ s->live[l].cnt++;
+ if (pp->_present.inflight)
+ s->live[l].refs += pp->inflight;
+ if (pp->_present.inflight_mem)
+ s->live[l].bytes += pp->inflight_mem;
+}
+
+/* We don't know how many pages are sitting in cache and ring
+ * so we will under-count the recycling rate a bit.
+ */
+static void print_json_recycling_stats(struct pp_stat *s)
+{
+ double recycle;
+
+ if (s->alloc_fast + s->alloc_slow) {
+ recycle = (double)(s->recycle_ring + s->recycle_cache) /
+ (s->alloc_fast + s->alloc_slow) * 100;
+ jsonw_float_field(json_wtr, "recycling_pct", recycle);
+ }
+
+ jsonw_name(json_wtr, "alloc");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "slow", s->alloc_slow);
+ jsonw_uint_field(json_wtr, "fast", s->alloc_fast);
+ jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "recycle");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "ring", s->recycle_ring);
+ jsonw_uint_field(json_wtr, "cache", s->recycle_cache);
+ jsonw_end_object(json_wtr);
+}
+
+static void print_plain_recycling_stats(struct pp_stat *s)
+{
+ double recycle;
+
+ if (s->alloc_fast + s->alloc_slow) {
+ recycle = (double)(s->recycle_ring + s->recycle_cache) /
+ (s->alloc_fast + s->alloc_slow) * 100;
+ printf("recycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)",
+ recycle, s->alloc_slow, s->alloc_fast,
+ s->recycle_ring, s->recycle_cache);
+ }
+}
+
+static void print_json_stats(struct pp_stats_array *a)
+{
+ jsonw_start_array(json_wtr);
+
+ for (unsigned int i = 0; i < a->i; i++) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat *s = &a->s[i];
+ const char *name;
+
+ jsonw_start_object(json_wtr);
+
+ if (!s->ifc) {
+ jsonw_string_field(json_wtr, "ifname", "<orphan>");
+ jsonw_uint_field(json_wtr, "ifindex", 0);
+ } else {
+ name = if_indextoname(s->ifc, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", s->ifc);
+ }
+
+ jsonw_uint_field(json_wtr, "page_pools", s->live[1].cnt);
+ jsonw_uint_field(json_wtr, "zombies", s->live[0].cnt);
+
+ jsonw_name(json_wtr, "live");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "refs", s->live[1].refs);
+ jsonw_uint_field(json_wtr, "bytes", s->live[1].bytes);
+ jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "zombie");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "refs", s->live[0].refs);
+ jsonw_uint_field(json_wtr, "bytes", s->live[0].bytes);
+ jsonw_end_object(json_wtr);
+
+ if (s->alloc_fast || s->alloc_slow)
+ print_json_recycling_stats(s);
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void print_plain_stats(struct pp_stats_array *a)
+{
+ for (unsigned int i = 0; i < a->i; i++) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat *s = &a->s[i];
+ const char *name;
+
+ if (!s->ifc) {
+ printf("<orphan>\t");
+ } else {
+ name = if_indextoname(s->ifc, ifname);
+ if (name)
+ printf("%8s", name);
+ printf("[%u]\t", s->ifc);
+ }
+
+ printf("page pools: %u (zombies: %u)\n",
+ s->live[1].cnt, s->live[0].cnt);
+ printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
+ s->live[1].refs, s->live[1].bytes,
+ s->live[0].refs, s->live[0].bytes);
+
+ if (s->alloc_fast || s->alloc_slow) {
+ printf("\t\t");
+ print_plain_recycling_stats(s);
+ printf("\n");
+ }
+ }
+}
+
+static bool
+find_pool_stat_in_list(struct netdev_page_pool_stats_get_list *pp_stats,
+ __u64 pool_id, struct pp_stat *pstat)
+{
+ ynl_dump_foreach(pp_stats, pp) {
+ if (!pp->_present.info || !pp->info._present.id)
+ continue;
+ if (pp->info.id != pool_id)
+ continue;
+
+ memset(pstat, 0, sizeof(*pstat));
+ if (pp->_present.alloc_fast)
+ pstat->alloc_fast = pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ pstat->alloc_fast += pp->alloc_refill;
+ if (pp->_present.alloc_slow)
+ pstat->alloc_slow = pp->alloc_slow;
+ if (pp->_present.recycle_ring)
+ pstat->recycle_ring = pp->recycle_ring;
+ if (pp->_present.recycle_cached)
+ pstat->recycle_cache = pp->recycle_cached;
+ return true;
+ }
+ return false;
+}
+
+static void
+print_json_pool_list(struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats,
+ bool zombies_only)
+{
+ jsonw_start_array(json_wtr);
+
+ ynl_dump_foreach(pools, pp) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat pstat;
+ const char *name;
+
+ if (zombies_only && !pp->_present.detach_time)
+ continue;
+
+ jsonw_start_object(json_wtr);
+
+ jsonw_uint_field(json_wtr, "id", pp->id);
+
+ if (pp->_present.ifindex) {
+ name = if_indextoname(pp->ifindex, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", pp->ifindex);
+ }
+
+ if (pp->_present.napi_id)
+ jsonw_uint_field(json_wtr, "napi_id", pp->napi_id);
+
+ if (pp->_present.inflight)
+ jsonw_uint_field(json_wtr, "refs", pp->inflight);
+
+ if (pp->_present.inflight_mem)
+ jsonw_uint_field(json_wtr, "bytes", pp->inflight_mem);
+
+ if (pp->_present.detach_time)
+ jsonw_uint_field(json_wtr, "detach_time", pp->detach_time);
+
+ if (pp->_present.dmabuf)
+ jsonw_uint_field(json_wtr, "dmabuf", pp->dmabuf);
+
+ if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+ (pstat.alloc_fast || pstat.alloc_slow))
+ print_json_recycling_stats(&pstat);
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void
+print_plain_pool_list(struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats,
+ bool zombies_only)
+{
+ ynl_dump_foreach(pools, pp) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat pstat;
+ const char *name;
+
+ if (zombies_only && !pp->_present.detach_time)
+ continue;
+
+ printf("pool id: %llu", pp->id);
+
+ if (pp->_present.ifindex) {
+ name = if_indextoname(pp->ifindex, ifname);
+ if (name)
+ printf(" dev: %s", name);
+ printf("[%u]", pp->ifindex);
+ }
+
+ if (pp->_present.napi_id)
+ printf(" napi: %llu", pp->napi_id);
+
+ printf("\n");
+
+ if (pp->_present.inflight || pp->_present.inflight_mem) {
+ printf(" inflight:");
+ if (pp->_present.inflight)
+ printf(" %llu pages", pp->inflight);
+ if (pp->_present.inflight_mem)
+ printf(" %llu bytes", pp->inflight_mem);
+ printf("\n");
+ }
+
+ if (pp->_present.detach_time)
+ printf(" detached: %llu\n", pp->detach_time);
+
+ if (pp->_present.dmabuf)
+ printf(" dmabuf: %u\n", pp->dmabuf);
+
+ if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+ (pstat.alloc_fast || pstat.alloc_slow)) {
+ printf(" ");
+ print_plain_recycling_stats(&pstat);
+ printf("\n");
+ }
+ }
+}
+
+static void aggregate_device_stats(struct pp_stats_array *a,
+ struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats)
+{
+ ynl_dump_foreach(pools, pp) {
+ struct pp_stat *s = find_ifc(a, pp->ifindex);
+
+ count_pool(s, 1, pp);
+ if (pp->_present.detach_time)
+ count_pool(s, 0, pp);
+ }
+
+ ynl_dump_foreach(pp_stats, pp) {
+ struct pp_stat *s = find_ifc(a, pp->info.ifindex);
+
+ if (pp->_present.alloc_fast)
+ s->alloc_fast += pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ s->alloc_fast += pp->alloc_refill;
+ if (pp->_present.alloc_slow)
+ s->alloc_slow += pp->alloc_slow;
+ if (pp->_present.recycle_ring)
+ s->recycle_ring += pp->recycle_ring;
+ if (pp->_present.recycle_cached)
+ s->recycle_cache += pp->recycle_cached;
+ }
+}
+
+static int do_stats(int argc, char **argv)
+{
+ struct netdev_page_pool_stats_get_list *pp_stats;
+ struct netdev_page_pool_get_list *pools;
+ enum {
+ GROUP_BY_DEVICE,
+ GROUP_BY_POOL,
+ } group_by = GROUP_BY_DEVICE;
+ bool zombies_only = false;
+ struct pp_stats_array a = {};
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret = 0;
+
+ /* Parse options */
+ while (argc > 0) {
+ if (is_prefix(*argv, "group-by")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ return -1;
+
+ if (is_prefix(*argv, "device")) {
+ group_by = GROUP_BY_DEVICE;
+ } else if (is_prefix(*argv, "pp") ||
+ is_prefix(*argv, "page-pool") ||
+ is_prefix(*argv, "none")) {
+ group_by = GROUP_BY_POOL;
+ } else {
+ p_err("invalid group-by value '%s'", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "zombies")) {
+ zombies_only = true;
+ group_by = GROUP_BY_POOL;
+ NEXT_ARG();
+ } else {
+ p_err("unknown option '%s'", *argv);
+ return -1;
+ }
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ pools = netdev_page_pool_get_dump(ys);
+ if (!pools) {
+ p_err("failed to get page pools: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ pp_stats = netdev_page_pool_stats_get_dump(ys);
+ if (!pp_stats) {
+ p_err("failed to get page pool stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_free_pp_list;
+ }
+
+ /* If grouping by pool, print individual pools */
+ if (group_by == GROUP_BY_POOL) {
+ if (json_output)
+ print_json_pool_list(pools, pp_stats, zombies_only);
+ else
+ print_plain_pool_list(pools, pp_stats, zombies_only);
+ } else {
+ /* Aggregated stats mode (group-by device) */
+ a.max = 64;
+ a.s = calloc(a.max, sizeof(*a.s));
+ if (!a.s) {
+ p_err("failed to allocate stats array");
+ ret = -1;
+ goto exit_free_stats_list;
+ }
+
+ aggregate_device_stats(&a, pools, pp_stats);
+
+ if (json_output)
+ print_json_stats(&a);
+ else
+ print_plain_stats(&a);
+
+ free(a.s);
+ }
+
+exit_free_stats_list:
+ netdev_page_pool_stats_get_list_free(pp_stats);
+exit_free_pp_list:
+ netdev_page_pool_get_list_free(pools);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s page-pool { COMMAND | help }\n"
+ " %s page-pool stats [ OPTIONS ]\n"
+ "\n"
+ " OPTIONS := { group-by { device | page-pool | none } | zombies }\n"
+ "\n"
+ " stats - Display page pool statistics\n"
+ " stats group-by device - Group statistics by network device (default)\n"
+ " stats group-by page-pool | pp | none\n"
+ " - Show individual page pool details (no grouping)\n"
+ " stats zombies - Show only zombie page pools (detached but with\n"
+ " pages in flight). Implies group-by page-pool.\n"
+ "",
+ bin_name, bin_name);
+
+ return 0;
+}
+
+static const struct cmd page_pool_cmds[] = {
+ { "help", do_help },
+ { "stats", do_stats },
+ { 0 }
+};
+
+int do_page_pool(int argc, char **argv)
+{
+ return cmd_select(page_pool_cmds, argc, argv, do_help);
+}
diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c
new file mode 100644
index 000000000000..31fb45709ffa
--- /dev/null
+++ b/tools/net/ynl/ynltool/qstats.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+#include <math.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+static enum netdev_qstats_scope scope; /* default - device */
+
+struct queue_balance {
+ unsigned int ifindex;
+ enum netdev_queue_type type;
+ unsigned int queue_count;
+ __u64 *rx_packets;
+ __u64 *rx_bytes;
+ __u64 *tx_packets;
+ __u64 *tx_bytes;
+};
+
+static void print_json_qstats(struct netdev_qstats_get_list *qstats)
+{
+ jsonw_start_array(json_wtr);
+
+ ynl_dump_foreach(qstats, qs) {
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ jsonw_start_object(json_wtr);
+
+ name = if_indextoname(qs->ifindex, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", qs->ifindex);
+
+ if (qs->_present.queue_type)
+ jsonw_string_field(json_wtr, "queue-type",
+ netdev_queue_type_str(qs->queue_type));
+ if (qs->_present.queue_id)
+ jsonw_uint_field(json_wtr, "queue-id", qs->queue_id);
+
+ if (qs->_present.rx_packets || qs->_present.rx_bytes ||
+ qs->_present.rx_alloc_fail || qs->_present.rx_hw_drops ||
+ qs->_present.rx_csum_complete || qs->_present.rx_hw_gro_packets) {
+ jsonw_name(json_wtr, "rx");
+ jsonw_start_object(json_wtr);
+ if (qs->_present.rx_packets)
+ jsonw_uint_field(json_wtr, "packets", qs->rx_packets);
+ if (qs->_present.rx_bytes)
+ jsonw_uint_field(json_wtr, "bytes", qs->rx_bytes);
+ if (qs->_present.rx_alloc_fail)
+ jsonw_uint_field(json_wtr, "alloc-fail", qs->rx_alloc_fail);
+ if (qs->_present.rx_hw_drops)
+ jsonw_uint_field(json_wtr, "hw-drops", qs->rx_hw_drops);
+ if (qs->_present.rx_hw_drop_overruns)
+ jsonw_uint_field(json_wtr, "hw-drop-overruns", qs->rx_hw_drop_overruns);
+ if (qs->_present.rx_hw_drop_ratelimits)
+ jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->rx_hw_drop_ratelimits);
+ if (qs->_present.rx_csum_complete)
+ jsonw_uint_field(json_wtr, "csum-complete", qs->rx_csum_complete);
+ if (qs->_present.rx_csum_unnecessary)
+ jsonw_uint_field(json_wtr, "csum-unnecessary", qs->rx_csum_unnecessary);
+ if (qs->_present.rx_csum_none)
+ jsonw_uint_field(json_wtr, "csum-none", qs->rx_csum_none);
+ if (qs->_present.rx_csum_bad)
+ jsonw_uint_field(json_wtr, "csum-bad", qs->rx_csum_bad);
+ if (qs->_present.rx_hw_gro_packets)
+ jsonw_uint_field(json_wtr, "hw-gro-packets", qs->rx_hw_gro_packets);
+ if (qs->_present.rx_hw_gro_bytes)
+ jsonw_uint_field(json_wtr, "hw-gro-bytes", qs->rx_hw_gro_bytes);
+ if (qs->_present.rx_hw_gro_wire_packets)
+ jsonw_uint_field(json_wtr, "hw-gro-wire-packets", qs->rx_hw_gro_wire_packets);
+ if (qs->_present.rx_hw_gro_wire_bytes)
+ jsonw_uint_field(json_wtr, "hw-gro-wire-bytes", qs->rx_hw_gro_wire_bytes);
+ jsonw_end_object(json_wtr);
+ }
+
+ if (qs->_present.tx_packets || qs->_present.tx_bytes ||
+ qs->_present.tx_hw_drops || qs->_present.tx_csum_none ||
+ qs->_present.tx_hw_gso_packets) {
+ jsonw_name(json_wtr, "tx");
+ jsonw_start_object(json_wtr);
+ if (qs->_present.tx_packets)
+ jsonw_uint_field(json_wtr, "packets", qs->tx_packets);
+ if (qs->_present.tx_bytes)
+ jsonw_uint_field(json_wtr, "bytes", qs->tx_bytes);
+ if (qs->_present.tx_hw_drops)
+ jsonw_uint_field(json_wtr, "hw-drops", qs->tx_hw_drops);
+ if (qs->_present.tx_hw_drop_errors)
+ jsonw_uint_field(json_wtr, "hw-drop-errors", qs->tx_hw_drop_errors);
+ if (qs->_present.tx_hw_drop_ratelimits)
+ jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->tx_hw_drop_ratelimits);
+ if (qs->_present.tx_csum_none)
+ jsonw_uint_field(json_wtr, "csum-none", qs->tx_csum_none);
+ if (qs->_present.tx_needs_csum)
+ jsonw_uint_field(json_wtr, "needs-csum", qs->tx_needs_csum);
+ if (qs->_present.tx_hw_gso_packets)
+ jsonw_uint_field(json_wtr, "hw-gso-packets", qs->tx_hw_gso_packets);
+ if (qs->_present.tx_hw_gso_bytes)
+ jsonw_uint_field(json_wtr, "hw-gso-bytes", qs->tx_hw_gso_bytes);
+ if (qs->_present.tx_hw_gso_wire_packets)
+ jsonw_uint_field(json_wtr, "hw-gso-wire-packets", qs->tx_hw_gso_wire_packets);
+ if (qs->_present.tx_hw_gso_wire_bytes)
+ jsonw_uint_field(json_wtr, "hw-gso-wire-bytes", qs->tx_hw_gso_wire_bytes);
+ if (qs->_present.tx_stop)
+ jsonw_uint_field(json_wtr, "stop", qs->tx_stop);
+ if (qs->_present.tx_wake)
+ jsonw_uint_field(json_wtr, "wake", qs->tx_wake);
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void print_one(bool present, const char *name, unsigned long long val,
+ int *line)
+{
+ if (!present)
+ return;
+
+ if (!*line) {
+ printf(" ");
+ ++(*line);
+ }
+
+ /* Don't waste space on tx- and rx- prefix, its implied by queue type */
+ if (scope == NETDEV_QSTATS_SCOPE_QUEUE &&
+ (name[0] == 'r' || name[0] == 't') &&
+ name[1] == 'x' && name[2] == '-')
+ name += 3;
+
+ printf(" %15s: %15llu", name, val);
+
+ if (++(*line) == 3) {
+ printf("\n");
+ *line = 0;
+ }
+}
+
+static void print_plain_qstats(struct netdev_qstats_get_list *qstats)
+{
+ ynl_dump_foreach(qstats, qs) {
+ char ifname[IF_NAMESIZE];
+ const char *name;
+ int n;
+
+ name = if_indextoname(qs->ifindex, ifname);
+ if (name)
+ printf("%s", name);
+ else
+ printf("ifindex:%u", qs->ifindex);
+
+ if (qs->_present.queue_type && qs->_present.queue_id)
+ printf("\t%s-%-3u",
+ netdev_queue_type_str(qs->queue_type),
+ qs->queue_id);
+ else
+ printf("\t ");
+
+ n = 1;
+
+ /* Basic counters */
+ print_one(qs->_present.rx_packets, "rx-packets", qs->rx_packets, &n);
+ print_one(qs->_present.rx_bytes, "rx-bytes", qs->rx_bytes, &n);
+ print_one(qs->_present.tx_packets, "tx-packets", qs->tx_packets, &n);
+ print_one(qs->_present.tx_bytes, "tx-bytes", qs->tx_bytes, &n);
+
+ /* RX error/drop counters */
+ print_one(qs->_present.rx_alloc_fail, "rx-alloc-fail",
+ qs->rx_alloc_fail, &n);
+ print_one(qs->_present.rx_hw_drops, "rx-hw-drops",
+ qs->rx_hw_drops, &n);
+ print_one(qs->_present.rx_hw_drop_overruns, "rx-hw-drop-overruns",
+ qs->rx_hw_drop_overruns, &n);
+ print_one(qs->_present.rx_hw_drop_ratelimits, "rx-hw-drop-ratelimits",
+ qs->rx_hw_drop_ratelimits, &n);
+
+ /* RX checksum counters */
+ print_one(qs->_present.rx_csum_complete, "rx-csum-complete",
+ qs->rx_csum_complete, &n);
+ print_one(qs->_present.rx_csum_unnecessary, "rx-csum-unnecessary",
+ qs->rx_csum_unnecessary, &n);
+ print_one(qs->_present.rx_csum_none, "rx-csum-none",
+ qs->rx_csum_none, &n);
+ print_one(qs->_present.rx_csum_bad, "rx-csum-bad",
+ qs->rx_csum_bad, &n);
+
+ /* RX GRO counters */
+ print_one(qs->_present.rx_hw_gro_packets, "rx-hw-gro-packets",
+ qs->rx_hw_gro_packets, &n);
+ print_one(qs->_present.rx_hw_gro_bytes, "rx-hw-gro-bytes",
+ qs->rx_hw_gro_bytes, &n);
+ print_one(qs->_present.rx_hw_gro_wire_packets, "rx-hw-gro-wire-packets",
+ qs->rx_hw_gro_wire_packets, &n);
+ print_one(qs->_present.rx_hw_gro_wire_bytes, "rx-hw-gro-wire-bytes",
+ qs->rx_hw_gro_wire_bytes, &n);
+
+ /* TX error/drop counters */
+ print_one(qs->_present.tx_hw_drops, "tx-hw-drops",
+ qs->tx_hw_drops, &n);
+ print_one(qs->_present.tx_hw_drop_errors, "tx-hw-drop-errors",
+ qs->tx_hw_drop_errors, &n);
+ print_one(qs->_present.tx_hw_drop_ratelimits, "tx-hw-drop-ratelimits",
+ qs->tx_hw_drop_ratelimits, &n);
+
+ /* TX checksum counters */
+ print_one(qs->_present.tx_csum_none, "tx-csum-none",
+ qs->tx_csum_none, &n);
+ print_one(qs->_present.tx_needs_csum, "tx-needs-csum",
+ qs->tx_needs_csum, &n);
+
+ /* TX GSO counters */
+ print_one(qs->_present.tx_hw_gso_packets, "tx-hw-gso-packets",
+ qs->tx_hw_gso_packets, &n);
+ print_one(qs->_present.tx_hw_gso_bytes, "tx-hw-gso-bytes",
+ qs->tx_hw_gso_bytes, &n);
+ print_one(qs->_present.tx_hw_gso_wire_packets, "tx-hw-gso-wire-packets",
+ qs->tx_hw_gso_wire_packets, &n);
+ print_one(qs->_present.tx_hw_gso_wire_bytes, "tx-hw-gso-wire-bytes",
+ qs->tx_hw_gso_wire_bytes, &n);
+
+ /* TX queue control */
+ print_one(qs->_present.tx_stop, "tx-stop", qs->tx_stop, &n);
+ print_one(qs->_present.tx_wake, "tx-wake", qs->tx_wake, &n);
+
+ if (n)
+ printf("\n");
+ }
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct netdev_qstats_get_list *qstats;
+ struct netdev_qstats_get_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret = 0;
+
+ /* Parse options */
+ while (argc > 0) {
+ if (is_prefix(*argv, "scope") || is_prefix(*argv, "group-by")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ return -1;
+
+ if (is_prefix(*argv, "queue")) {
+ scope = NETDEV_QSTATS_SCOPE_QUEUE;
+ } else if (is_prefix(*argv, "device")) {
+ scope = 0;
+ } else {
+ p_err("invalid scope value '%s'", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+ } else {
+ p_err("unknown option '%s'", *argv);
+ return -1;
+ }
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_qstats_get_req_alloc();
+ if (!req) {
+ p_err("failed to allocate qstats request");
+ ret = -1;
+ goto exit_close;
+ }
+
+ if (scope)
+ netdev_qstats_get_req_set_scope(req, scope);
+
+ qstats = netdev_qstats_get_dump(ys, req);
+ netdev_qstats_get_req_free(req);
+ if (!qstats) {
+ p_err("failed to get queue stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Print the stats as returned by the kernel */
+ if (json_output)
+ print_json_qstats(qstats);
+ else
+ print_plain_qstats(qstats);
+
+ netdev_qstats_get_list_free(qstats);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static void compute_stats(__u64 *values, unsigned int count,
+ double *mean, double *stddev, __u64 *min, __u64 *max)
+{
+ double sum = 0.0, variance = 0.0;
+ unsigned int i;
+
+ *min = ~0ULL;
+ *max = 0;
+
+ if (count == 0) {
+ *mean = 0;
+ *stddev = 0;
+ *min = 0;
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ sum += values[i];
+ if (values[i] < *min)
+ *min = values[i];
+ if (values[i] > *max)
+ *max = values[i];
+ }
+
+ *mean = sum / count;
+
+ if (count > 1) {
+ for (i = 0; i < count; i++) {
+ double diff = values[i] - *mean;
+
+ variance += diff * diff;
+ }
+ *stddev = sqrt(variance / (count - 1));
+ } else {
+ *stddev = 0;
+ }
+}
+
+static void print_balance_stats(const char *name, enum netdev_queue_type type,
+ __u64 *values, unsigned int count)
+{
+ double mean, stddev, cv, ns;
+ __u64 min, max;
+
+ if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+ (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+ return;
+
+ compute_stats(values, count, &mean, &stddev, &min, &max);
+
+ cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+ ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+ printf(" %-12s: cv=%.1f%% ns=%.1f%% stddev=%.0f\n",
+ name, cv, ns, stddev);
+ printf(" %-12s min=%llu max=%llu mean=%.0f\n",
+ "", min, max, mean);
+}
+
+static void
+print_balance_stats_json(const char *name, enum netdev_queue_type type,
+ __u64 *values, unsigned int count)
+{
+ double mean, stddev, cv, ns;
+ __u64 min, max;
+
+ if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+ (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+ return;
+
+ compute_stats(values, count, &mean, &stddev, &min, &max);
+
+ cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+ ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+ jsonw_name(json_wtr, name);
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "queue-count", count);
+ jsonw_uint_field(json_wtr, "min", min);
+ jsonw_uint_field(json_wtr, "max", max);
+ jsonw_float_field(json_wtr, "mean", mean);
+ jsonw_float_field(json_wtr, "stddev", stddev);
+ jsonw_float_field(json_wtr, "coefficient-of-variation", cv);
+ jsonw_float_field(json_wtr, "normalized-spread", ns);
+ jsonw_end_object(json_wtr);
+}
+
+static int cmp_ifindex_type(const void *a, const void *b)
+{
+ const struct netdev_qstats_get_rsp *qa = a;
+ const struct netdev_qstats_get_rsp *qb = b;
+
+ if (qa->ifindex != qb->ifindex)
+ return qa->ifindex - qb->ifindex;
+ if (qa->queue_type != qb->queue_type)
+ return qa->queue_type - qb->queue_type;
+ return qa->queue_id - qb->queue_id;
+}
+
+static int do_balance(int argc, char **argv __attribute__((unused)))
+{
+ struct netdev_qstats_get_list *qstats;
+ struct netdev_qstats_get_req *req;
+ struct netdev_qstats_get_rsp **sorted;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ unsigned int count = 0;
+ unsigned int i, j;
+ int ret = 0;
+
+ if (argc > 0) {
+ p_err("balance command takes no arguments");
+ return -1;
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_qstats_get_req_alloc();
+ if (!req) {
+ p_err("failed to allocate qstats request");
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Always use queue scope for balance analysis */
+ netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE);
+
+ qstats = netdev_qstats_get_dump(ys, req);
+ netdev_qstats_get_req_free(req);
+ if (!qstats) {
+ p_err("failed to get queue stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Count and sort queues */
+ ynl_dump_foreach(qstats, qs)
+ count++;
+
+ if (count == 0) {
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ printf("No queue statistics available\n");
+ goto exit_free_qstats;
+ }
+
+ sorted = calloc(count, sizeof(*sorted));
+ if (!sorted) {
+ p_err("failed to allocate sorted array");
+ ret = -1;
+ goto exit_free_qstats;
+ }
+
+ i = 0;
+ ynl_dump_foreach(qstats, qs)
+ sorted[i++] = qs;
+
+ qsort(sorted, count, sizeof(*sorted), cmp_ifindex_type);
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ /* Process each device/queue-type combination */
+ i = 0;
+ while (i < count) {
+ __u64 *rx_packets, *rx_bytes, *tx_packets, *tx_bytes;
+ enum netdev_queue_type type = sorted[i]->queue_type;
+ unsigned int ifindex = sorted[i]->ifindex;
+ unsigned int queue_count = 0;
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ /* Count queues for this device/type */
+ for (j = i; j < count && sorted[j]->ifindex == ifindex &&
+ sorted[j]->queue_type == type; j++)
+ queue_count++;
+
+ /* Skip if no packets/bytes (inactive queues) */
+ if (!sorted[i]->_present.rx_packets &&
+ !sorted[i]->_present.rx_bytes &&
+ !sorted[i]->_present.tx_packets &&
+ !sorted[i]->_present.tx_bytes)
+ goto next_ifc;
+
+ /* Allocate arrays for statistics */
+ rx_packets = calloc(queue_count, sizeof(*rx_packets));
+ rx_bytes = calloc(queue_count, sizeof(*rx_bytes));
+ tx_packets = calloc(queue_count, sizeof(*tx_packets));
+ tx_bytes = calloc(queue_count, sizeof(*tx_bytes));
+
+ if (!rx_packets || !rx_bytes || !tx_packets || !tx_bytes) {
+ p_err("failed to allocate statistics arrays");
+ free(rx_packets);
+ free(rx_bytes);
+ free(tx_packets);
+ free(tx_bytes);
+ ret = -1;
+ goto exit_free_sorted;
+ }
+
+ /* Collect statistics */
+ for (j = 0; j < queue_count; j++) {
+ rx_packets[j] = sorted[i + j]->_present.rx_packets ?
+ sorted[i + j]->rx_packets : 0;
+ rx_bytes[j] = sorted[i + j]->_present.rx_bytes ?
+ sorted[i + j]->rx_bytes : 0;
+ tx_packets[j] = sorted[i + j]->_present.tx_packets ?
+ sorted[i + j]->tx_packets : 0;
+ tx_bytes[j] = sorted[i + j]->_present.tx_bytes ?
+ sorted[i + j]->tx_bytes : 0;
+ }
+
+ name = if_indextoname(ifindex, ifname);
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", ifindex);
+ jsonw_string_field(json_wtr, "queue-type",
+ netdev_queue_type_str(type));
+
+ print_balance_stats_json("rx-packets", type,
+ rx_packets, queue_count);
+ print_balance_stats_json("rx-bytes", type,
+ rx_bytes, queue_count);
+ print_balance_stats_json("tx-packets", type,
+ tx_packets, queue_count);
+ print_balance_stats_json("tx-bytes", type,
+ tx_bytes, queue_count);
+
+ jsonw_end_object(json_wtr);
+ } else {
+ if (name)
+ printf("%s", name);
+ else
+ printf("ifindex:%u", ifindex);
+ printf(" %s %d queues:\n",
+ netdev_queue_type_str(type), queue_count);
+
+ print_balance_stats("rx-packets", type,
+ rx_packets, queue_count);
+ print_balance_stats("rx-bytes", type,
+ rx_bytes, queue_count);
+ print_balance_stats("tx-packets", type,
+ tx_packets, queue_count);
+ print_balance_stats("tx-bytes", type,
+ tx_bytes, queue_count);
+ printf("\n");
+ }
+
+ free(rx_packets);
+ free(rx_bytes);
+ free(tx_packets);
+ free(tx_bytes);
+
+next_ifc:
+ i += queue_count;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+exit_free_sorted:
+ free(sorted);
+exit_free_qstats:
+ netdev_qstats_get_list_free(qstats);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s qstats { COMMAND | help }\n"
+ " %s qstats [ show ] [ OPTIONS ]\n"
+ " %s qstats balance\n"
+ "\n"
+ " OPTIONS := { scope queue | group-by { device | queue } }\n"
+ "\n"
+ " show - Display queue statistics (default)\n"
+ " Statistics are aggregated for the entire device.\n"
+ " show scope queue - Display per-queue statistics\n"
+ " show group-by device - Display device-aggregated statistics (default)\n"
+ " show group-by queue - Display per-queue statistics\n"
+ " balance - Analyze traffic distribution balance.\n"
+ "",
+ bin_name, bin_name, bin_name);
+
+ return 0;
+}
+
+static const struct cmd qstats_cmds[] = {
+ { "show", do_show },
+ { "balance", do_balance },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_qstats(int argc, char **argv)
+{
+ return cmd_select(qstats_cmds, argc, argv, do_help);
+}
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 4faa4dd72f35..73d883128511 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
+arch/x86/lib/cpu-feature-names.c
arch/x86/lib/inat-tables.c
/objtool
+feature
+FEATURE-DUMP.objtool
fixdep
libsubcmd/
diff --git a/tools/objtool/Build b/tools/objtool/Build
index a3cdf8af6635..600da051af12 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -8,13 +8,17 @@ objtool-y += builtin-check.o
objtool-y += elf.o
objtool-y += objtool.o
-objtool-$(BUILD_ORC) += orc_gen.o
-objtool-$(BUILD_ORC) += orc_dump.o
+objtool-$(BUILD_DISAS) += disas.o
+objtool-$(BUILD_DISAS) += trace.o
+
+objtool-$(BUILD_ORC) += orc_gen.o orc_dump.o
+objtool-$(BUILD_KLP) += builtin-klp.o klp-diff.o klp-post-link.o
objtool-y += libstring.o
objtool-y += libctype.o
objtool-y += str_error_r.o
objtool-y += librbtree.o
+objtool-y += signal.o
$(OUTPUT)libstring.o: ../lib/string.c FORCE
$(call rule_mkdir)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 8c20361dd100..ad6e1ec706ce 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -2,6 +2,28 @@
include ../scripts/Makefile.include
include ../scripts/Makefile.arch
+ifeq ($(SRCARCH),x86)
+ BUILD_ORC := y
+ ARCH_HAS_KLP := y
+endif
+
+ifeq ($(SRCARCH),loongarch)
+ BUILD_ORC := y
+endif
+
+ifeq ($(ARCH_HAS_KLP),y)
+ HAVE_XXHASH = $(shell printf "$(pound)include <xxhash.h>\nXXH3_state_t *state;int main() {}" | \
+ $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n)
+ ifeq ($(HAVE_XXHASH),y)
+ BUILD_KLP := y
+ LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \
+ -DBUILD_KLP
+ LIBXXHASH_LIBS := $(shell $(HOSTPKG_CONFIG) libxxhash --libs 2>/dev/null || echo -lxxhash)
+ endif
+endif
+
+export BUILD_ORC BUILD_KLP
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -23,6 +45,11 @@ LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lel
all: $(OBJTOOL)
+WARNINGS := -Werror -Wall -Wextra -Wmissing-prototypes \
+ -Wmissing-declarations -Wwrite-strings \
+ -Wno-implicit-fallthrough -Wno-sign-compare \
+ -Wno-unused-parameter
+
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
@@ -30,11 +57,11 @@ INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/objtool/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include \
-I$(LIBSUBCMD_OUTPUT)/include
-# Note, EXTRA_WARNINGS here was determined for CC and not HOSTCC, it
-# is passed here to match a legacy behavior.
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
-OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
-OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+
+OBJTOOL_CFLAGS := -std=gnu11 -fomit-frame-pointer -O2 -g $(WARNINGS) \
+ $(INCLUDES) $(LIBELF_FLAGS) $(LIBXXHASH_CFLAGS) $(HOSTCFLAGS)
+
+OBJTOOL_LDFLAGS := $(LIBSUBCMD) $(LIBELF_LIBS) $(LIBXXHASH_LIBS) $(HOSTLDFLAGS)
# Allow old libelf to be used:
elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
@@ -43,20 +70,32 @@ OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
# Always want host compilation.
HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
-AWK = awk
-MKDIR = mkdir
+#
+# To support disassembly, objtool needs libopcodes which is provided
+# with libbdf (binutils-dev or binutils-devel package).
+#
+FEATURE_USER = .objtool
+FEATURE_TESTS = libbfd disassembler-init-styled
+FEATURE_DISPLAY =
+include $(srctree)/tools/build/Makefile.feature
+
+ifeq ($(feature-disassembler-init-styled), 1)
+ OBJTOOL_CFLAGS += -DDISASM_INIT_STYLED
+endif
-BUILD_ORC := n
+BUILD_DISAS := n
-ifeq ($(SRCARCH),x86)
- BUILD_ORC := y
+ifeq ($(feature-libbfd),1)
+ BUILD_DISAS := y
+ OBJTOOL_CFLAGS += -DDISAS -DPACKAGE="objtool"
+ OBJTOOL_LDFLAGS += -lopcodes
endif
-ifeq ($(SRCARCH),loongarch)
- BUILD_ORC := y
-endif
+export BUILD_DISAS
+
+AWK = awk
+MKDIR = mkdir
-export BUILD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
@@ -86,7 +125,10 @@ $(LIBSUBCMD)-clean:
clean: $(LIBSUBCMD)-clean
$(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
$(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)$(RM) $(OUTPUT)arch/x86/lib/cpu-feature-names.c $(OUTPUT)fixdep
$(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
+ $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.objtool
+ $(Q)$(RM) -r -- $(OUTPUT)feature
FORCE:
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index b6fdc68053cc..6cd288150f49 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -1,13 +1,25 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/warn.h>
#include <asm/inst.h>
#include <asm/orc_types.h>
#include <linux/objtool_types.h>
#include <arch/elf.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "zero", "ra", "tp", "sp",
+ "a0", "a1", "a2", "a3",
+ "a4", "a5", "a6", "a7",
+ "t0", "t1", "t2", "t3",
+ "t4", "t5", "t6", "t7",
+ "t8", "u0", "fp", "s0",
+ "s1", "s2", "s3", "s4",
+ "s5", "s6", "s7", "s8"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "_mcount");
}
@@ -17,9 +29,9 @@ unsigned long arch_jump_destination(struct instruction *insn)
return insn->offset + (insn->immediate << 2);
}
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
{
- return addend;
+ return reloc_addend(reloc);
}
bool arch_pc_relative_reloc(struct reloc *reloc)
@@ -278,6 +290,25 @@ static bool decode_insn_reg2i16_fomat(union loongarch_instruction inst,
return true;
}
+static bool decode_insn_reg3_fomat(union loongarch_instruction inst,
+ struct instruction *insn)
+{
+ switch (inst.reg3_format.opcode) {
+ case amswapw_op:
+ if (inst.reg3_format.rd == LOONGARCH_GPR_ZERO &&
+ inst.reg3_format.rk == LOONGARCH_GPR_RA &&
+ inst.reg3_format.rj == LOONGARCH_GPR_ZERO) {
+ /* amswap.w $zero, $ra, $zero */
+ insn->type = INSN_BUG;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
unsigned long offset, unsigned int maxlen,
struct instruction *insn)
@@ -309,11 +340,19 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
return 0;
if (decode_insn_reg2i16_fomat(inst, insn))
return 0;
+ if (decode_insn_reg3_fomat(inst, insn))
+ return 0;
- if (inst.word == 0)
+ if (inst.word == 0) {
+ /* andi $zero, $zero, 0x0 */
insn->type = INSN_NOP;
- else if (inst.reg0i15_format.opcode == break_op) {
- /* break */
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x0) {
+ /* break 0x0 */
+ insn->type = INSN_TRAP;
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x1) {
+ /* break 0x1 */
insn->type = INSN_BUG;
} else if (inst.reg2_format.opcode == ertn_op) {
/* ertn */
@@ -387,3 +426,14 @@ unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *tabl
return reloc->sym->offset + reloc_addend(reloc);
}
}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_loongarch,
+ bfd_mach_loongarch32, bfd_mach_loongarch64,
+ NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/loongarch/orc.c b/tools/objtool/arch/loongarch/orc.c
index b58c5ff443c9..ffd3a3c858ae 100644
--- a/tools/objtool/arch/loongarch/orc.c
+++ b/tools/objtool/arch/loongarch/orc.c
@@ -5,7 +5,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
{
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index e39f86d97002..aba774109437 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
struct table_info *next_table;
unsigned long tmp_insn_offset;
unsigned long tmp_rodata_offset;
+ bool is_valid_list = false;
rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
if (!rsec)
@@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
INIT_LIST_HEAD(&table_list);
for_each_reloc(rsec, reloc) {
+ if (reloc->sym->sec->rodata)
+ continue;
+
+ if (strcmp(insn->sec->name, reloc->sym->sec->name))
+ continue;
+
orig_table = malloc(sizeof(struct table_info));
if (!orig_table) {
WARN("malloc failed");
@@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
break;
+
+ if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) {
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ if (orig_table->insn_offset == insn->offset) {
+ is_valid_list = true;
+ break;
+ }
+ }
+
+ if (!is_valid_list) {
+ list_del_init(&table_list);
+ continue;
+ }
+
+ break;
+ }
}
list_for_each_entry(orig_table, &table_list, jump_info) {
@@ -171,3 +194,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
return rodata_reloc;
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return NULL;
+}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index c851c51d4bd3..e534ac1123b3 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -3,20 +3,32 @@
#include <stdio.h>
#include <stdlib.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
#include <objtool/builtin.h>
-#include <objtool/endianness.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "r0", "sp", "r2", "r3",
+ "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11",
+ "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19",
+ "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27",
+ "r28", "r29", "r30", "r31",
+ "ra"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "_mcount");
}
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
{
- return addend;
+ return reloc_addend(reloc);
}
bool arch_callee_saved_reg(unsigned char reg)
@@ -128,3 +140,14 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_powerpc,
+ bfd_mach_ppc, bfd_mach_ppc64,
+ NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
index 51610689abf7..8f9bf61ca089 100644
--- a/tools/objtool/arch/powerpc/special.c
+++ b/tools/objtool/arch/powerpc/special.c
@@ -18,3 +18,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
{
exit(-1);
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return NULL;
+}
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 3dedb2fd8f3a..febee0b8ee0b 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,5 +1,5 @@
-objtool-y += special.o
objtool-y += decode.o
+objtool-y += special.o
objtool-y += orc.o
inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
@@ -12,3 +12,14 @@ $(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
+
+cpu_features = ../arch/x86/include/asm/cpufeatures.h
+cpu_features_script = ../arch/x86/tools/gen-cpu-feature-names-x86.awk
+
+$(OUTPUT)arch/x86/lib/cpu-feature-names.c: $(cpu_features_script) $(cpu_features)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(AWK) -f $(cpu_features_script) $(cpu_features) > $@
+
+$(OUTPUT)arch/x86/special.o: $(OUTPUT)arch/x86/lib/cpu-feature-names.c
+
+CFLAGS_special.o += -I$(OUTPUT)arch/x86/lib
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..f4af82508228 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,14 +16,22 @@
#include <asm/orc_types.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
#include <objtool/builtin.h>
#include <arch/elf.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "rax", "rcx", "rdx", "rbx",
+ "rsp", "rbp", "rsi", "rdi",
+ "r8", "r9", "r10", "r11",
+ "r12", "r13", "r14", "r15",
+ "ra"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "__fentry__");
}
@@ -68,9 +76,65 @@ bool arch_callee_saved_reg(unsigned char reg)
}
}
-unsigned long arch_dest_reloc_offset(int addend)
+/* Undo the effects of __pa_symbol() if necessary */
+static unsigned long phys_to_virt(unsigned long pa)
+{
+ s64 va = pa;
+
+ if (va > 0)
+ va &= ~(0x80000000);
+
+ return va;
+}
+
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
+{
+ s64 addend = reloc_addend(reloc);
+
+ if (arch_pc_relative_reloc(reloc))
+ addend += insn->offset + insn->len - reloc_offset(reloc);
+
+ return phys_to_virt(addend);
+}
+
+static void scan_for_insn(struct section *sec, unsigned long offset,
+ unsigned long *insn_off, unsigned int *insn_len)
+{
+ unsigned long o = 0;
+ struct insn insn;
+
+ while (1) {
+
+ insn_decode(&insn, sec->data->d_buf + o, sec_size(sec) - o,
+ INSN_MODE_64);
+
+ if (o + insn.length > offset) {
+ *insn_off = o;
+ *insn_len = insn.length;
+ return;
+ }
+
+ o += insn.length;
+ }
+}
+
+u64 arch_adjusted_addend(struct reloc *reloc)
{
- return addend + 4;
+ unsigned int type = reloc_type(reloc);
+ s64 addend = reloc_addend(reloc);
+ unsigned long insn_off;
+ unsigned int insn_len;
+
+ if (type == R_X86_64_PLT32)
+ return addend + 4;
+
+ if (type != R_X86_64_PC32 || !is_text_sec(reloc->sec->base))
+ return addend;
+
+ scan_for_insn(reloc->sec->base, reloc_offset(reloc),
+ &insn_off, &insn_len);
+
+ return addend + insn_off + insn_len - reloc_offset(reloc);
}
unsigned long arch_jump_destination(struct instruction *insn)
@@ -189,15 +253,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
- /*
- * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
- */
- if (op1 == 0xea) {
- insn->len = 1;
- insn->type = INSN_BUG;
- return 0;
- }
-
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
@@ -503,6 +558,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
break;
case 0x90:
+ if (rex_b) /* XCHG %r8, %rax */
+ break;
+
+ if (prefix == 0xf3) /* REP NOP := PAUSE */
+ break;
+
insn->type = INSN_NOP;
break;
@@ -556,13 +617,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
} else if (op2 == 0x0b || op2 == 0xb9) {
- /* ud2 */
+ /* ud2, ud1 */
insn->type = INSN_BUG;
- } else if (op2 == 0x0d || op2 == 0x1f) {
+ } else if (op2 == 0x1f) {
- /* nopl/nopw */
- insn->type = INSN_NOP;
+ /* 0f 1f /0 := NOPL */
+ if (modrm_reg == 0)
+ insn->type = INSN_NOP;
} else if (op2 == 0x1e) {
@@ -692,6 +754,10 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
insn->type = INSN_SYSRET;
break;
+ case 0xd6: /* udb */
+ insn->type = INSN_BUG;
+ break;
+
case 0xe0: /* loopne */
case 0xe1: /* loope */
case 0xe2: /* loop */
@@ -880,3 +946,26 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_i386,
+ bfd_mach_i386_i386, bfd_mach_x86_64,
+ "att");
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/x86/orc.c b/tools/objtool/arch/x86/orc.c
index 7176b9ec5b05..735e150ca6b7 100644
--- a/tools/objtool/arch/x86/orc.c
+++ b/tools/objtool/arch/x86/orc.c
@@ -5,7 +5,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
{
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 06ca4a2659a4..e817a3fff449 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -4,6 +4,10 @@
#include <objtool/special.h>
#include <objtool/builtin.h>
#include <objtool/warn.h>
+#include <asm/cpufeatures.h>
+
+/* cpu feature name array generated from cpufeatures.h */
+#include "cpu-feature-names.c"
void arch_handle_alternative(struct special_alt *alt)
{
@@ -89,7 +93,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
/* look for a relocation which references .rodata */
text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
insn->offset, insn->len);
- if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+ if (!text_reloc || !is_sec_sym(text_reloc->sym) ||
!text_reloc->sym->sec->rodata)
return NULL;
@@ -134,3 +138,9 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
*table_size = 0;
return rodata_reloc;
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return (feature_number < ARRAY_SIZE(cpu_feature_names)) ?
+ cpu_feature_names[feature_number] : NULL;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 80239843e9f0..b780df513715 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -73,34 +73,41 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
static const struct option check_options[] = {
OPT_GROUP("Actions:"),
+ OPT_BOOLEAN(0, "checksum", &opts.checksum, "generate per-function checksums"),
+ OPT_BOOLEAN(0, "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+ OPT_STRING_OPTARG('d', "disas", &opts.disas, "function-pattern", "disassemble functions", "*"),
OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
- OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
- OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
- OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
- OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
- OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
- OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
- OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
- OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"),
- OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
- OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
- OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
- OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
- OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
- OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
+ OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
+ OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+ OPT_BOOLEAN(0, "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
+ OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
+ OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
+ OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
+ OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"),
+ OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
+ OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
+ OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
+ OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+ OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
- OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
- OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
- OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
- OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
- OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
- OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
- OPT_STRING('o', "output", &opts.output, "file", "output file name"),
- OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
- OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
- OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
- OPT_BOOLEAN(0, "Werror", &opts.werror, "return error on warnings"),
+ OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+ OPT_BOOLEAN(0, "backup", &opts.backup, "create backup (.orig) file on warning/error"),
+ OPT_STRING(0, "debug-checksum", &opts.debug_checksum, "funcs", "enable checksum debug output"),
+ OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+ OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+ OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+ OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
+ OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+ OPT_STRING('o', "output", &opts.output, "file", "output file name"),
+ OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+ OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+ OPT_STRING(0, "trace", &opts.trace, "func", "trace function validation"),
+ OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
+ OPT_BOOLEAN(0, "werror", &opts.werror, "return error on warnings"),
+ OPT_BOOLEAN(0, "wide", &opts.wide, "wide output"),
OPT_END(),
};
@@ -158,10 +165,25 @@ static bool opts_valid(void)
return false;
}
- if (opts.hack_jump_label ||
+#ifndef BUILD_KLP
+ if (opts.checksum) {
+ ERROR("--checksum not supported; install xxhash-devel/libxxhash-dev (version >= 0.8) and recompile");
+ return false;
+ }
+#endif
+
+ if (opts.debug_checksum && !opts.checksum) {
+ ERROR("--debug-checksum requires --checksum");
+ return false;
+ }
+
+ if (opts.checksum ||
+ opts.disas ||
+ opts.hack_jump_label ||
opts.hack_noinstr ||
opts.ibt ||
opts.mcount ||
+ opts.noabs ||
opts.noinstr ||
opts.orc ||
opts.retpoline ||
@@ -241,15 +263,12 @@ static void save_argv(int argc, const char **argv)
ERROR_GLIBC("strdup(%s)", argv[i]);
exit(1);
}
- };
+ }
}
-void print_args(void)
+int make_backup(void)
{
- char *backup = NULL;
-
- if (opts.output || opts.dryrun)
- goto print;
+ char *backup;
/*
* Make a backup before kbuild deletes the file so the error
@@ -258,33 +277,32 @@ void print_args(void)
backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
if (!backup) {
ERROR_GLIBC("malloc");
- goto print;
+ return 1;
}
strcpy(backup, objname);
strcat(backup, ORIG_SUFFIX);
- if (copy_file(objname, backup)) {
- backup = NULL;
- goto print;
- }
+ if (copy_file(objname, backup))
+ return 1;
-print:
/*
- * Print the cmdline args to make it easier to recreate. If '--output'
- * wasn't used, add it to the printed args with the backup as input.
+ * Print the cmdline args to make it easier to recreate.
*/
+
fprintf(stderr, "%s", orig_argv[0]);
for (int i = 1; i < orig_argc; i++) {
char *arg = orig_argv[i];
- if (backup && !strcmp(arg, objname))
+ /* Modify the printed args to use the backup */
+ if (!opts.output && !strcmp(arg, objname))
fprintf(stderr, " %s -o %s", backup, objname);
else
fprintf(stderr, " %s", arg);
}
fprintf(stderr, "\n");
+ return 0;
}
int objtool_run(int argc, const char **argv)
@@ -330,5 +348,5 @@ int objtool_run(int argc, const char **argv)
if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
return 1;
- return 0;
+ return elf_close(file->elf);
}
diff --git a/tools/objtool/builtin-klp.c b/tools/objtool/builtin-klp.c
new file mode 100644
index 000000000000..56d5a5b92f72
--- /dev/null
+++ b/tools/objtool/builtin-klp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <subcmd/parse-options.h>
+#include <string.h>
+#include <stdlib.h>
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/klp.h>
+
+struct subcmd {
+ const char *name;
+ const char *description;
+ int (*fn)(int, const char **);
+};
+
+static struct subcmd subcmds[] = {
+ { "diff", "Generate binary diff of two object files", cmd_klp_diff, },
+ { "post-link", "Finalize klp symbols/relocs after module linking", cmd_klp_post_link, },
+};
+
+static void cmd_klp_usage(void)
+{
+ fprintf(stderr, "usage: objtool klp <subcommand> [<options>]\n\n");
+ fprintf(stderr, "Subcommands:\n");
+
+ for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+ struct subcmd *cmd = &subcmds[i];
+
+ fprintf(stderr, " %s\t%s\n", cmd->name, cmd->description);
+ }
+
+ exit(1);
+}
+
+int cmd_klp(int argc, const char **argv)
+{
+ argc--;
+ argv++;
+
+ if (!argc)
+ cmd_klp_usage();
+
+ if (argc) {
+ for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+ struct subcmd *cmd = &subcmds[i];
+
+ if (!strcmp(cmd->name, argv[0]))
+ return cmd->fn(argc, argv);
+ }
+ }
+
+ cmd_klp_usage();
+ return 0;
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d14f20ef1db1..3f7999317f4d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3,6 +3,8 @@
* Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
*/
+#define _GNU_SOURCE /* memmem() */
+#include <fnmatch.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -11,10 +13,13 @@
#include <objtool/builtin.h>
#include <objtool/cfi.h>
#include <objtool/arch.h>
+#include <objtool/disas.h>
#include <objtool/check.h>
#include <objtool/special.h>
+#include <objtool/trace.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
+#include <objtool/checksum.h>
+#include <objtool/util.h>
#include <linux/objtool_types.h>
#include <linux/hashtable.h>
@@ -22,11 +27,6 @@
#include <linux/static_call_types.h>
#include <linux/string.h>
-struct alternative {
- struct alternative *next;
- struct instruction *insn;
-};
-
static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
static struct cfi_init_state initial_func_cfi;
@@ -34,6 +34,10 @@ static struct cfi_state init_cfi;
static struct cfi_state func_cfi;
static struct cfi_state force_undefined_cfi;
+struct disas_context *objtool_disas_ctx;
+
+size_t sym_name_max_len;
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset)
{
@@ -106,7 +110,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
#define for_each_insn(file, insn) \
for (struct section *__sec, *__fake = (struct section *)1; \
__fake; __fake = NULL) \
- for_each_sec(file, __sec) \
+ for_each_sec(file->elf, __sec) \
sec_for_each_insn(file, __sec, insn)
#define func_for_each_insn(file, func, insn) \
@@ -131,15 +135,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
-static inline struct symbol *insn_call_dest(struct instruction *insn)
-{
- if (insn->type == INSN_JUMP_DYNAMIC ||
- insn->type == INSN_CALL_DYNAMIC)
- return NULL;
-
- return insn->_call_dest;
-}
-
static inline struct reloc *insn_jump_table(struct instruction *insn)
{
if (insn->type == INSN_JUMP_DYNAMIC ||
@@ -186,20 +181,6 @@ static bool is_sibling_call(struct instruction *insn)
}
/*
- * Checks if a string ends with another.
- */
-static bool str_ends_with(const char *s, const char *sub)
-{
- const int slen = strlen(s);
- const int sublen = strlen(sub);
-
- if (sublen > slen)
- return 0;
-
- return !memcmp(s + slen - sublen, sub, sublen);
-}
-
-/*
* Checks if a function is a Rust "noreturn" one.
*/
static bool is_rust_noreturn(const struct symbol *func)
@@ -217,6 +198,7 @@ static bool is_rust_noreturn(const struct symbol *func)
* these come from the Rust standard library).
*/
return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") ||
+ str_ends_with(func->name, "_4core6option13expect_failed") ||
str_ends_with(func->name, "_4core6option13unwrap_failed") ||
str_ends_with(func->name, "_4core6result13unwrap_failed") ||
str_ends_with(func->name, "_4core9panicking5panic") ||
@@ -261,7 +243,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
if (!func)
return false;
- if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) {
+ if (!is_local_sym(func)) {
if (is_rust_noreturn(func))
return true;
@@ -270,7 +252,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
return true;
}
- if (func->bind == STB_WEAK)
+ if (is_weak_sym(func))
return false;
if (!func->len)
@@ -430,14 +412,13 @@ static int decode_instructions(struct objtool_file *file)
struct symbol *func;
unsigned long offset;
struct instruction *insn;
- int ret;
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
struct instruction *insns = NULL;
u8 prev_len = 0;
u8 idx = 0;
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ if (!is_text_sec(sec))
continue;
if (strcmp(sec->name, ".altinstr_replacement") &&
@@ -460,9 +441,9 @@ static int decode_instructions(struct objtool_file *file)
if (!strcmp(sec->name, ".init.text") && !opts.module)
sec->init = true;
- for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
+ for (offset = 0; offset < sec_size(sec); offset += insn->len) {
if (!insns || idx == INSN_CHUNK_MAX) {
- insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+ insns = calloc(INSN_CHUNK_SIZE, sizeof(*insn));
if (!insns) {
ERROR_GLIBC("calloc");
return -1;
@@ -479,11 +460,8 @@ static int decode_instructions(struct objtool_file *file)
insn->offset = offset;
insn->prev_len = prev_len;
- ret = arch_decode_instruction(file, sec, offset,
- sec->sh.sh_size - offset,
- insn);
- if (ret)
- return ret;
+ if (arch_decode_instruction(file, sec, offset, sec_size(sec) - offset, insn))
+ return -1;
prev_len = insn->len;
@@ -500,12 +478,12 @@ static int decode_instructions(struct objtool_file *file)
}
sec_for_each_sym(sec, func) {
- if (func->type != STT_NOTYPE && func->type != STT_FUNC)
+ if (!is_notype_sym(func) && !is_func_sym(func))
continue;
- if (func->offset == sec->sh.sh_size) {
+ if (func->offset == sec_size(sec)) {
/* Heuristic: likely an "end" symbol */
- if (func->type == STT_NOTYPE)
+ if (is_notype_sym(func))
continue;
ERROR("%s(): STT_FUNC at end of section", func->name);
return -1;
@@ -521,7 +499,7 @@ static int decode_instructions(struct objtool_file *file)
sym_for_each_insn(file, func, insn) {
insn->sym = func;
- if (func->type == STT_FUNC &&
+ if (is_func_sym(func) &&
insn->type == INSN_ENDBR &&
list_empty(&insn->call_node)) {
if (insn->offset == func->offset) {
@@ -565,7 +543,7 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
func = reloc->sym;
- if (func->type == STT_SECTION)
+ if (is_sec_sym(func))
func = find_symbol_by_offset(reloc->sym->sec,
reloc_addend(reloc));
if (!func) {
@@ -599,7 +577,7 @@ static int init_pv_ops(struct objtool_file *file)
};
const char *pv_ops;
struct symbol *sym;
- int idx, nr, ret;
+ int idx, nr;
if (!opts.noinstr)
return 0;
@@ -611,7 +589,7 @@ static int init_pv_ops(struct objtool_file *file)
return 0;
nr = sym->len / sizeof(unsigned long);
- file->pv_ops = calloc(sizeof(struct pv_state), nr);
+ file->pv_ops = calloc(nr, sizeof(struct pv_state));
if (!file->pv_ops) {
ERROR_GLIBC("calloc");
return -1;
@@ -621,14 +599,27 @@ static int init_pv_ops(struct objtool_file *file)
INIT_LIST_HEAD(&file->pv_ops[idx].targets);
for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) {
- ret = add_pv_ops(file, pv_ops);
- if (ret)
- return ret;
+ if (add_pv_ops(file, pv_ops))
+ return -1;
}
return 0;
}
+static bool is_livepatch_module(struct objtool_file *file)
+{
+ struct section *sec;
+
+ if (!opts.module)
+ return false;
+
+ sec = find_section_by_name(file->elf, ".modinfo");
+ if (!sec)
+ return false;
+
+ return memmem(sec->data->d_buf, sec_size(sec), "\0livepatch=Y", 12);
+}
+
static int create_static_call_sections(struct objtool_file *file)
{
struct static_call_site *site;
@@ -640,8 +631,14 @@ static int create_static_call_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".static_call_sites");
if (sec) {
- INIT_LIST_HEAD(&file->static_call_list);
- WARN("file already has .static_call_sites section, skipping");
+ /*
+ * Livepatch modules may have already extracted the static call
+ * site entries to take advantage of vmlinux static call
+ * privileges.
+ */
+ if (!file->klp)
+ WARN("file already has .static_call_sites section, skipping");
+
return 0;
}
@@ -685,7 +682,7 @@ static int create_static_call_sections(struct objtool_file *file)
key_sym = find_symbol_by_name(file->elf, tmp);
if (!key_sym) {
- if (!opts.module) {
+ if (!opts.module || file->klp) {
ERROR("static_call: can't find static_call_key symbol: %s", tmp);
return -1;
}
@@ -828,7 +825,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
struct symbol *sym = insn->sym;
*site = 0;
- if (opts.module && sym && sym->type == STT_FUNC &&
+ if (opts.module && sym && is_func_sym(sym) &&
insn->offset == sym->offset &&
(!strcmp(sym->name, "init_module") ||
!strcmp(sym->name, "cleanup_module"))) {
@@ -856,14 +853,13 @@ static int create_cfi_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".cfi_sites");
if (sec) {
- INIT_LIST_HEAD(&file->call_list);
WARN("file already has .cfi_sites section, skipping");
return 0;
}
idx = 0;
- for_each_sym(file, sym) {
- if (sym->type != STT_FUNC)
+ for_each_sym(file->elf, sym) {
+ if (!is_func_sym(sym))
continue;
if (strncmp(sym->name, "__cfi_", 6))
@@ -878,8 +874,8 @@ static int create_cfi_sections(struct objtool_file *file)
return -1;
idx = 0;
- for_each_sym(file, sym) {
- if (sym->type != STT_FUNC)
+ for_each_sym(file->elf, sym) {
+ if (!is_func_sym(sym))
continue;
if (strncmp(sym->name, "__cfi_", 6))
@@ -905,8 +901,13 @@ static int create_mcount_loc_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, "__mcount_loc");
if (sec) {
- INIT_LIST_HEAD(&file->mcount_loc_list);
- WARN("file already has __mcount_loc section, skipping");
+ /*
+ * Livepatch modules have already extracted their __mcount_loc
+ * entries to cover the !CONFIG_FTRACE_MCOUNT_USE_OBJTOOL case.
+ */
+ if (!file->klp)
+ WARN("file already has __mcount_loc section, skipping");
+
return 0;
}
@@ -950,7 +951,6 @@ static int create_direct_call_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".call_sites");
if (sec) {
- INIT_LIST_HEAD(&file->call_list);
WARN("file already has .call_sites section, skipping");
return 0;
}
@@ -981,6 +981,59 @@ static int create_direct_call_sections(struct objtool_file *file)
return 0;
}
+#ifdef BUILD_KLP
+static int create_sym_checksum_section(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *sym;
+ unsigned int idx = 0;
+ struct sym_checksum *checksum;
+ size_t entsize = sizeof(struct sym_checksum);
+
+ sec = find_section_by_name(file->elf, ".discard.sym_checksum");
+ if (sec) {
+ if (!opts.dryrun)
+ WARN("file already has .discard.sym_checksum section, skipping");
+
+ return 0;
+ }
+
+ for_each_sym(file->elf, sym)
+ if (sym->csum.checksum)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section_pair(file->elf, ".discard.sym_checksum", entsize,
+ idx, idx);
+ if (!sec)
+ return -1;
+
+ idx = 0;
+ for_each_sym(file->elf, sym) {
+ if (!sym->csum.checksum)
+ continue;
+
+ if (!elf_init_reloc(file->elf, sec->rsec, idx, idx * entsize,
+ sym, 0, R_TEXT64))
+ return -1;
+
+ checksum = (struct sym_checksum *)sec->data->d_buf + idx;
+ checksum->addr = 0; /* reloc */
+ checksum->checksum = sym->csum.checksum;
+
+ mark_sec_changed(file->elf, sec, true);
+
+ idx++;
+ }
+
+ return 0;
+}
+#else
+static int create_sym_checksum_section(struct objtool_file *file) { return -EINVAL; }
+#endif
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -1432,9 +1485,14 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
}
static bool is_first_func_insn(struct objtool_file *file,
- struct instruction *insn, struct symbol *sym)
+ struct instruction *insn)
{
- if (insn->offset == sym->offset)
+ struct symbol *func = insn_func(insn);
+
+ if (!func)
+ return false;
+
+ if (insn->offset == func->offset)
return true;
/* Allow direct CALL/JMP past ENDBR */
@@ -1442,7 +1500,7 @@ static bool is_first_func_insn(struct objtool_file *file,
struct instruction *prev = prev_insn_same_sym(file, insn);
if (prev && prev->type == INSN_ENDBR &&
- insn->offset == sym->offset + prev->len)
+ insn->offset == func->offset + prev->len)
return true;
}
@@ -1450,44 +1508,22 @@ static bool is_first_func_insn(struct objtool_file *file,
}
/*
- * A sibling call is a tail-call to another symbol -- to differentiate from a
- * recursive tail-call which is to the same symbol.
- */
-static bool jump_is_sibling_call(struct objtool_file *file,
- struct instruction *from, struct instruction *to)
-{
- struct symbol *fs = from->sym;
- struct symbol *ts = to->sym;
-
- /* Not a sibling call if from/to a symbol hole */
- if (!fs || !ts)
- return false;
-
- /* Not a sibling call if not targeting the start of a symbol. */
- if (!is_first_func_insn(file, to, ts))
- return false;
-
- /* Disallow sibling calls into STT_NOTYPE */
- if (ts->type == STT_NOTYPE)
- return false;
-
- /* Must not be self to be a sibling */
- return fs->pfunc != ts->pfunc;
-}
-
-/*
* Find the destination instructions for all jumps.
*/
static int add_jump_destinations(struct objtool_file *file)
{
- struct instruction *insn, *jump_dest;
+ struct instruction *insn;
struct reloc *reloc;
- struct section *dest_sec;
- unsigned long dest_off;
- int ret;
for_each_insn(file, insn) {
struct symbol *func = insn_func(insn);
+ struct instruction *dest_insn;
+ struct section *dest_sec;
+ struct symbol *dest_sym;
+ unsigned long dest_off;
+
+ if (!is_static_jump(insn))
+ continue;
if (insn->jump_dest) {
/*
@@ -1496,53 +1532,53 @@ static int add_jump_destinations(struct objtool_file *file)
*/
continue;
}
- if (!is_static_jump(insn))
- continue;
reloc = insn_reloc(file, insn);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
- } else if (reloc->sym->type == STT_SECTION) {
- dest_sec = reloc->sym->sec;
- dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
- } else if (reloc->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
- continue;
- } else if (reloc->sym->return_thunk) {
- add_return_call(file, insn, true);
- continue;
- } else if (func) {
- /*
- * External sibling call or internal sibling call with
- * STT_FUNC reloc.
- */
- ret = add_call_dest(file, insn, reloc->sym, true);
- if (ret)
- return ret;
- continue;
- } else if (reloc->sym->sec->idx) {
- dest_sec = reloc->sym->sec;
- dest_off = reloc->sym->sym.st_value +
- arch_dest_reloc_offset(reloc_addend(reloc));
+ dest_sym = dest_sec->sym;
} else {
- /* non-func asm code jumping to another file */
- continue;
+ dest_sym = reloc->sym;
+ if (is_undef_sym(dest_sym)) {
+ if (dest_sym->retpoline_thunk) {
+ if (add_retpoline_call(file, insn))
+ return -1;
+ continue;
+ }
+
+ if (dest_sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
+ }
+
+ /* External symbol */
+ if (func) {
+ /* External sibling call */
+ if (add_call_dest(file, insn, dest_sym, true))
+ return -1;
+ continue;
+ }
+
+ /* Non-func asm code jumping to external symbol */
+ continue;
+ }
+
+ dest_sec = dest_sym->sec;
+ dest_off = dest_sym->offset + arch_insn_adjusted_addend(insn, reloc);
}
- jump_dest = find_insn(file, dest_sec, dest_off);
- if (!jump_dest) {
+ dest_insn = find_insn(file, dest_sec, dest_off);
+ if (!dest_insn) {
struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
/*
- * This is a special case for retbleed_untrain_ret().
- * It jumps to __x86_return_thunk(), but objtool
- * can't find the thunk's starting RET
- * instruction, because the RET is also in the
- * middle of another instruction. Objtool only
- * knows about the outer instruction.
+ * retbleed_untrain_ret() jumps to
+ * __x86_return_thunk(), but objtool can't find
+ * the thunk's starting RET instruction,
+ * because the RET is also in the middle of
+ * another instruction. Objtool only knows
+ * about the outer instruction.
*/
if (sym && sym->embedded_insn) {
add_return_call(file, insn, false);
@@ -1550,76 +1586,52 @@ static int add_jump_destinations(struct objtool_file *file)
}
/*
- * GCOV/KCOV dead code can jump to the end of the
- * function/section.
+ * GCOV/KCOV dead code can jump to the end of
+ * the function/section.
*/
if (file->ignore_unreachables && func &&
dest_sec == insn->sec &&
dest_off == func->offset + func->len)
continue;
- ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
- dest_sec->name, dest_off);
+ ERROR_INSN(insn, "can't find jump dest instruction at %s",
+ offstr(dest_sec, dest_off));
return -1;
}
- /*
- * An intra-TU jump in retpoline.o might not have a relocation
- * for its jump dest, in which case the above
- * add_{retpoline,return}_call() didn't happen.
- */
- if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) {
- if (jump_dest->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
- continue;
- }
- if (jump_dest->sym->return_thunk) {
- add_return_call(file, insn, true);
- continue;
- }
+ if (!dest_sym || is_sec_sym(dest_sym)) {
+ dest_sym = dest_insn->sym;
+ if (!dest_sym)
+ goto set_jump_dest;
}
- /*
- * Cross-function jump.
- */
- if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) {
+ if (dest_sym->retpoline_thunk && dest_insn->offset == dest_sym->offset) {
+ if (add_retpoline_call(file, insn))
+ return -1;
+ continue;
+ }
- /*
- * For GCC 8+, create parent/child links for any cold
- * subfunctions. This is _mostly_ redundant with a
- * similar initialization in read_symbols().
- *
- * If a function has aliases, we want the *first* such
- * function in the symbol table to be the subfunction's
- * parent. In that case we overwrite the
- * initialization done in read_symbols().
- *
- * However this code can't completely replace the
- * read_symbols() code because this doesn't detect the
- * case where the parent function's only reference to a
- * subfunction is through a jump table.
- */
- if (!strstr(func->name, ".cold") &&
- strstr(insn_func(jump_dest)->name, ".cold")) {
- func->cfunc = insn_func(jump_dest);
- insn_func(jump_dest)->pfunc = func;
- }
+ if (dest_sym->return_thunk && dest_insn->offset == dest_sym->offset) {
+ add_return_call(file, insn, true);
+ continue;
}
- if (jump_is_sibling_call(file, insn, jump_dest)) {
- /*
- * Internal sibling call without reloc or with
- * STT_SECTION reloc.
- */
- ret = add_call_dest(file, insn, insn_func(jump_dest), true);
- if (ret)
- return ret;
+ if (!insn->sym || insn->sym->pfunc == dest_sym->pfunc)
+ goto set_jump_dest;
+
+ /*
+ * Internal cross-function jump.
+ */
+
+ if (is_first_func_insn(file, dest_insn)) {
+ /* Internal sibling call */
+ if (add_call_dest(file, insn, dest_sym, true))
+ return -1;
continue;
}
- insn->jump_dest = jump_dest;
+set_jump_dest:
+ insn->jump_dest = dest_insn;
}
return 0;
@@ -1645,7 +1657,6 @@ static int add_call_destinations(struct objtool_file *file)
unsigned long dest_off;
struct symbol *dest;
struct reloc *reloc;
- int ret;
for_each_insn(file, insn) {
struct symbol *func = insn_func(insn);
@@ -1657,9 +1668,8 @@ static int add_call_destinations(struct objtool_file *file)
dest_off = arch_jump_destination(insn);
dest = find_call_destination(insn->sec, dest_off);
- ret = add_call_dest(file, insn, dest, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, dest, false))
+ return -1;
if (func && func->ignore)
continue;
@@ -1669,13 +1679,13 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
- if (func && insn_call_dest(insn)->type != STT_FUNC) {
+ if (func && !is_func_sym(insn_call_dest(insn))) {
ERROR_INSN(insn, "unsupported call to non-function");
return -1;
}
- } else if (reloc->sym->type == STT_SECTION) {
- dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
+ } else if (is_sec_sym(reloc->sym)) {
+ dest_off = arch_insn_adjusted_addend(insn, reloc);
dest = find_call_destination(reloc->sym->sec, dest_off);
if (!dest) {
ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx",
@@ -1683,19 +1693,16 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
- ret = add_call_dest(file, insn, dest, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, dest, false))
+ return -1;
} else if (reloc->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
+ if (add_retpoline_call(file, insn))
+ return -1;
} else {
- ret = add_call_dest(file, insn, reloc->sym, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, reloc->sym, false))
+ return -1;
}
}
@@ -1744,6 +1751,7 @@ static int handle_group_alt(struct objtool_file *file,
orig_alt_group->last_insn = last_orig_insn;
orig_alt_group->nop = NULL;
orig_alt_group->ignore = orig_insn->ignore_alts;
+ orig_alt_group->feature = 0;
} else {
if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1783,6 +1791,7 @@ static int handle_group_alt(struct objtool_file *file,
nop->type = INSN_NOP;
nop->sym = orig_insn->sym;
nop->alt_group = new_alt_group;
+ nop->fake = 1;
}
if (!special_alt->new_len) {
@@ -1847,6 +1856,7 @@ end:
new_alt_group->nop = nop;
new_alt_group->ignore = (*new_insn)->ignore_alts;
new_alt_group->cfi = orig_alt_group->cfi;
+ new_alt_group->feature = special_alt->feature;
return 0;
}
@@ -1911,8 +1921,9 @@ static int add_special_section_alts(struct objtool_file *file)
struct list_head special_alts;
struct instruction *orig_insn, *new_insn;
struct special_alt *special_alt, *tmp;
+ enum alternative_type alt_type;
struct alternative *alt;
- int ret;
+ struct alternative *a;
if (special_get_alts(file->elf, &special_alts))
return -1;
@@ -1944,16 +1955,18 @@ static int add_special_section_alts(struct objtool_file *file)
continue;
}
- ret = handle_group_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- return ret;
+ if (handle_group_alt(file, special_alt, orig_insn, &new_insn))
+ return -1;
+
+ alt_type = ALT_TYPE_INSTRUCTIONS;
} else if (special_alt->jump_or_nop) {
- ret = handle_jump_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- return ret;
+ if (handle_jump_alt(file, special_alt, orig_insn, &new_insn))
+ return -1;
+
+ alt_type = ALT_TYPE_JUMP_TABLE;
+ } else {
+ alt_type = ALT_TYPE_EX_TABLE;
}
alt = calloc(1, sizeof(*alt));
@@ -1963,8 +1976,20 @@ static int add_special_section_alts(struct objtool_file *file)
}
alt->insn = new_insn;
- alt->next = orig_insn->alts;
- orig_insn->alts = alt;
+ alt->type = alt_type;
+ alt->next = NULL;
+
+ /*
+ * Store alternatives in the same order they have been
+ * defined.
+ */
+ if (!orig_insn->alts) {
+ orig_insn->alts = alt;
+ } else {
+ for (a = orig_insn->alts; a->next; a = a->next)
+ ;
+ a->next = alt;
+ }
list_del(&special_alt->list);
free(special_alt);
@@ -2141,15 +2166,13 @@ static int add_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
struct instruction *insn;
- int ret;
func_for_each_insn(file, func, insn) {
if (!insn_jump_table(insn))
continue;
- ret = add_jump_table(file, insn);
- if (ret)
- return ret;
+ if (add_jump_table(file, insn))
+ return -1;
}
return 0;
@@ -2163,19 +2186,17 @@ static int add_func_jump_tables(struct objtool_file *file,
static int add_jump_table_alts(struct objtool_file *file)
{
struct symbol *func;
- int ret;
if (!file->rodata)
return 0;
- for_each_sym(file, func) {
- if (func->type != STT_FUNC)
+ for_each_sym(file->elf, func) {
+ if (!is_func_sym(func) || func->alias != func)
continue;
mark_func_jump_tables(file, func);
- ret = add_func_jump_tables(file, func);
- if (ret)
- return ret;
+ if (add_func_jump_tables(file, func))
+ return -1;
}
return 0;
@@ -2209,14 +2230,14 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- if (sec->sh.sh_size % sizeof(struct unwind_hint)) {
+ if (sec_size(sec) % sizeof(struct unwind_hint)) {
ERROR("struct unwind_hint size mismatch");
return -1;
}
file->hints = true;
- for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) {
+ for (i = 0; i < sec_size(sec) / sizeof(struct unwind_hint); i++) {
hint = (struct unwind_hint *)sec->data->d_buf + i;
reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
@@ -2225,14 +2246,7 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- if (reloc->sym->type == STT_SECTION) {
- offset = reloc_addend(reloc);
- } else if (reloc->sym->local_label) {
- offset = reloc->sym->offset;
- } else {
- ERROR("unexpected relocation symbol type in %s", sec->rsec->name);
- return -1;
- }
+ offset = reloc->sym->offset + reloc_addend(reloc);
insn = find_insn(file, reloc->sym->sec, offset);
if (!insn) {
@@ -2261,7 +2275,7 @@ static int read_unwind_hints(struct objtool_file *file)
if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL) {
+ if (sym && is_global_sym(sym)) {
if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
return -1;
@@ -2299,7 +2313,7 @@ static int read_annotate(struct objtool_file *file,
struct instruction *insn;
struct reloc *reloc;
uint64_t offset;
- int type, ret;
+ int type;
sec = find_section_by_name(file->elf, ".discard.annotate_insn");
if (!sec)
@@ -2317,10 +2331,13 @@ static int read_annotate(struct objtool_file *file,
sec->sh.sh_entsize = 8;
}
- for_each_reloc(sec->rsec, reloc) {
- type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
- type = bswap_if_needed(file->elf, type);
+ if (sec_num_entries(sec) != sec_num_entries(sec->rsec)) {
+ ERROR("bad .discard.annotate_insn section: missing relocs");
+ return -1;
+ }
+ for_each_reloc(sec->rsec, reloc) {
+ type = annotype(file->elf, sec, reloc);
offset = reloc->sym->offset + reloc_addend(reloc);
insn = find_insn(file, reloc->sym->sec, offset);
@@ -2329,9 +2346,8 @@ static int read_annotate(struct objtool_file *file,
return -1;
}
- ret = func(file, type, insn);
- if (ret < 0)
- return ret;
+ if (func(file, type, insn))
+ return -1;
}
return 0;
@@ -2392,6 +2408,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio
static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn)
{
+ struct symbol *sym;
+
switch (type) {
case ANNOTYPE_NOENDBR:
/* early */
@@ -2433,6 +2451,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi
insn->dead_end = false;
break;
+ case ANNOTYPE_NOCFI:
+ sym = insn->sym;
+ if (!sym) {
+ ERROR_INSN(insn, "dodgy NOCFI annotation");
+ return -1;
+ }
+ insn->sym->nocfi = 1;
+ break;
+
default:
ERROR_INSN(insn, "Unknown annotation type: %d", type);
return -1;
@@ -2453,28 +2480,19 @@ static bool is_profiling_func(const char *name)
if (!strncmp(name, "__sanitizer_cov_", 16))
return true;
- /*
- * Some compilers currently do not remove __tsan_func_entry/exit nor
- * __tsan_atomic_signal_fence (used for barrier instrumentation) with
- * the __no_sanitize_thread attribute, remove them. Once the kernel's
- * minimum Clang version is 14.0, this can be removed.
- */
- if (!strncmp(name, "__tsan_func_", 12) ||
- !strcmp(name, "__tsan_atomic_signal_fence"))
- return true;
-
return false;
}
static int classify_symbols(struct objtool_file *file)
{
struct symbol *func;
+ size_t len;
- for_each_sym(file, func) {
- if (func->type == STT_NOTYPE && strstarts(func->name, ".L"))
+ for_each_sym(file->elf, func) {
+ if (is_notype_sym(func) && strstarts(func->name, ".L"))
func->local_label = true;
- if (func->bind != STB_GLOBAL)
+ if (!is_global_sym(func))
continue;
if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
@@ -2495,6 +2513,10 @@ static int classify_symbols(struct objtool_file *file)
if (is_profiling_func(func->name))
func->profiling_func = true;
+
+ len = strlen(func->name);
+ if (len > sym_name_max_len)
+ sym_name_max_len = len;
}
return 0;
@@ -2515,7 +2537,7 @@ static void mark_rodata(struct objtool_file *file)
*
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
if ((!strncmp(sec->name, ".rodata", 7) &&
!strstr(sec->name, ".str1.")) ||
!strncmp(sec->name, ".data.rel.ro", 12)) {
@@ -2527,78 +2549,115 @@ static void mark_rodata(struct objtool_file *file)
file->rodata = found;
}
+static void mark_holes(struct objtool_file *file)
+{
+ struct instruction *insn;
+ bool in_hole = false;
+
+ if (!opts.link)
+ return;
+
+ /*
+ * Whole archive runs might encounter dead code from weak symbols.
+ * This is where the linker will have dropped the weak symbol in
+ * favour of a regular symbol, but leaves the code in place.
+ */
+ for_each_insn(file, insn) {
+ if (insn->sym || !find_symbol_hole_containing(insn->sec, insn->offset)) {
+ in_hole = false;
+ continue;
+ }
+
+ /* Skip function padding and pfx code */
+ if (!in_hole && insn->type == INSN_NOP)
+ continue;
+
+ in_hole = true;
+ insn->hole = 1;
+
+ /*
+ * If this hole jumps to a .cold function, mark it ignore.
+ */
+ if (insn->jump_dest) {
+ struct symbol *dest_func = insn_func(insn->jump_dest);
+
+ if (dest_func && dest_func->cold)
+ dest_func->ignore = true;
+ }
+ }
+}
+
+static bool validate_branch_enabled(void)
+{
+ return opts.stackval ||
+ opts.orc ||
+ opts.uaccess ||
+ opts.checksum;
+}
+
static int decode_sections(struct objtool_file *file)
{
- int ret;
+ file->klp = is_livepatch_module(file);
mark_rodata(file);
- ret = init_pv_ops(file);
- if (ret)
- return ret;
+ if (init_pv_ops(file))
+ return -1;
/*
* Must be before add_{jump_call}_destination.
*/
- ret = classify_symbols(file);
- if (ret)
- return ret;
+ if (classify_symbols(file))
+ return -1;
- ret = decode_instructions(file);
- if (ret)
- return ret;
+ if (decode_instructions(file))
+ return -1;
- ret = add_ignores(file);
- if (ret)
- return ret;
+ if (add_ignores(file))
+ return -1;
add_uaccess_safe(file);
- ret = read_annotate(file, __annotate_early);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_early))
+ return -1;
/*
* Must be before add_jump_destinations(), which depends on 'func'
* being set for alternatives, to enable proper sibling call detection.
*/
- if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
- ret = add_special_section_alts(file);
- if (ret)
- return ret;
+ if (validate_branch_enabled() || opts.noinstr || opts.hack_jump_label || opts.disas) {
+ if (add_special_section_alts(file))
+ return -1;
}
- ret = add_jump_destinations(file);
- if (ret)
- return ret;
+ if (add_jump_destinations(file))
+ return -1;
/*
* Must be before add_call_destination(); it changes INSN_CALL to
* INSN_JUMP.
*/
- ret = read_annotate(file, __annotate_ifc);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_ifc))
+ return -1;
- ret = add_call_destinations(file);
- if (ret)
- return ret;
+ if (add_call_destinations(file))
+ return -1;
- ret = add_jump_table_alts(file);
- if (ret)
- return ret;
+ if (add_jump_table_alts(file))
+ return -1;
- ret = read_unwind_hints(file);
- if (ret)
- return ret;
+ if (read_unwind_hints(file))
+ return -1;
+
+ /* Must be after add_jump_destinations() */
+ mark_holes(file);
/*
* Must be after add_call_destinations() such that it can override
* dead_end_function() marks.
*/
- ret = read_annotate(file, __annotate_late);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_late))
+ return -1;
return 0;
}
@@ -3223,18 +3282,19 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
return 0;
}
-static int handle_insn_ops(struct instruction *insn,
- struct instruction *next_insn,
- struct insn_state *state)
+static int noinline handle_insn_ops(struct instruction *insn,
+ struct instruction *next_insn,
+ struct insn_state *state)
{
+ struct insn_state prev_state __maybe_unused = *state;
struct stack_op *op;
- int ret;
+ int ret = 0;
for (op = insn->stack_ops; op; op = op->next) {
ret = update_cfi_state(insn, next_insn, &state->cfi, op);
if (ret)
- return ret;
+ goto done;
if (!opts.uaccess || !insn->alt_group)
continue;
@@ -3244,7 +3304,8 @@ static int handle_insn_ops(struct instruction *insn,
state->uaccess_stack = 1;
} else if (state->uaccess_stack >> 31) {
WARN_INSN(insn, "PUSHF stack exhausted");
- return 1;
+ ret = 1;
+ goto done;
}
state->uaccess_stack <<= 1;
state->uaccess_stack |= state->uaccess;
@@ -3260,7 +3321,10 @@ static int handle_insn_ops(struct instruction *insn,
}
}
- return 0;
+done:
+ TRACE_INSN_STATE(insn, &prev_state, state);
+
+ return ret;
}
static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
@@ -3352,7 +3416,7 @@ static bool pv_call_dest(struct objtool_file *file, struct instruction *insn)
if (!reloc || strcmp(reloc->sym->name, "pv_ops"))
return false;
- idx = (arch_dest_reloc_offset(reloc_addend(reloc)) / sizeof(void *));
+ idx = arch_insn_adjusted_addend(insn, reloc) / sizeof(void *);
if (file->pv_ops[idx].clean)
return true;
@@ -3514,9 +3578,14 @@ static bool skip_alt_group(struct instruction *insn)
{
struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
+ if (!insn->alt_group)
+ return false;
+
/* ANNOTATE_IGNORE_ALTERNATIVE */
- if (insn->alt_group && insn->alt_group->ignore)
+ if (insn->alt_group->ignore) {
+ TRACE_ALT(insn, "alt group ignored");
return true;
+ }
/*
* For NOP patched with CLAC/STAC, only follow the latter to avoid
@@ -3538,256 +3607,398 @@ static bool skip_alt_group(struct instruction *insn)
return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
}
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps). Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/objtool.txt.
- */
-static int validate_branch(struct objtool_file *file, struct symbol *func,
- struct instruction *insn, struct insn_state state)
+static int checksum_debug_init(struct objtool_file *file)
{
- struct alternative *alt;
- struct instruction *next_insn, *prev_insn = NULL;
- struct section *sec;
- u8 visited;
- int ret;
+ char *dup, *s;
- if (func && func->ignore)
+ if (!opts.debug_checksum)
return 0;
- sec = insn->sec;
+ dup = strdup(opts.debug_checksum);
+ if (!dup) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
- while (1) {
- next_insn = next_insn_to_validate(file, insn);
+ s = dup;
+ while (*s) {
+ struct symbol *func;
+ char *comma;
- if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
- /* Ignore KCFI type preambles, which always fall through */
- if (!strncmp(func->name, "__cfi_", 6) ||
- !strncmp(func->name, "__pfx_", 6))
- return 0;
+ comma = strchr(s, ',');
+ if (comma)
+ *comma = '\0';
- if (file->ignore_unreachables)
- return 0;
+ func = find_symbol_by_name(file->elf, s);
+ if (!func || !is_func_sym(func))
+ WARN("--debug-checksum: can't find '%s'", s);
+ else
+ func->debug_checksum = 1;
- WARN("%s() falls through to next function %s()",
- func->name, insn_func(insn)->name);
- func->warned = 1;
+ if (!comma)
+ break;
- return 1;
- }
+ s = comma + 1;
+ }
- visited = VISITED_BRANCH << state.uaccess;
- if (insn->visited & VISITED_BRANCH_MASK) {
- if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
- return 1;
+ free(dup);
+ return 0;
+}
- if (insn->visited & visited)
- return 0;
- } else {
- nr_insns_visited++;
- }
+static void checksum_update_insn(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn)
+{
+ struct reloc *reloc = insn_reloc(file, insn);
+ unsigned long offset;
+ struct symbol *sym;
- if (state.noinstr)
- state.instr += insn->instr;
+ if (insn->fake)
+ return;
- if (insn->hint) {
- if (insn->restore) {
- struct instruction *save_insn, *i;
+ checksum_update(func, insn, insn->sec->data->d_buf + insn->offset, insn->len);
- i = insn;
- save_insn = NULL;
+ if (!reloc) {
+ struct symbol *call_dest = insn_call_dest(insn);
- sym_for_each_insn_continue_reverse(file, func, i) {
- if (i->save) {
- save_insn = i;
- break;
- }
- }
+ if (call_dest)
+ checksum_update(func, insn, call_dest->demangled_name,
+ strlen(call_dest->demangled_name));
+ return;
+ }
- if (!save_insn) {
- WARN_INSN(insn, "no corresponding CFI save for CFI restore");
- return 1;
+ sym = reloc->sym;
+ offset = arch_insn_adjusted_addend(insn, reloc);
+
+ if (is_string_sec(sym->sec)) {
+ char *str;
+
+ str = sym->sec->data->d_buf + sym->offset + offset;
+ checksum_update(func, insn, str, strlen(str));
+ return;
+ }
+
+ if (is_sec_sym(sym)) {
+ sym = find_symbol_containing(reloc->sym->sec, offset);
+ if (!sym)
+ return;
+
+ offset -= sym->offset;
+ }
+
+ checksum_update(func, insn, sym->demangled_name, strlen(sym->demangled_name));
+ checksum_update(func, insn, &offset, sizeof(offset));
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state);
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state);
+
+static int validate_insn(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state *statep,
+ struct instruction *prev_insn, struct instruction *next_insn,
+ bool *dead_end)
+{
+ char *alt_name __maybe_unused = NULL;
+ struct alternative *alt;
+ u8 visited;
+ int ret;
+
+ /*
+ * Any returns before the end of this function are effectively dead
+ * ends, i.e. validate_branch() has reached the end of the branch.
+ */
+ *dead_end = true;
+
+ visited = VISITED_BRANCH << statep->uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
+ if (!insn->hint && !insn_cfi_match(insn, &statep->cfi))
+ return 1;
+
+ if (insn->visited & visited) {
+ TRACE_INSN(insn, "already visited");
+ return 0;
+ }
+ } else {
+ nr_insns_visited++;
+ }
+
+ if (statep->noinstr)
+ statep->instr += insn->instr;
+
+ if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
}
+ }
- if (!save_insn->visited) {
- /*
- * If the restore hint insn is at the
- * beginning of a basic block and was
- * branched to from elsewhere, and the
- * save insn hasn't been visited yet,
- * defer following this branch for now.
- * It will be seen later via the
- * straight-line path.
- */
- if (!prev_insn)
- return 0;
+ if (!save_insn) {
+ WARN_INSN(insn, "no corresponding CFI save for CFI restore");
+ return 1;
+ }
- WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
- return 1;
+ if (!save_insn->visited) {
+ /*
+ * If the restore hint insn is at the
+ * beginning of a basic block and was
+ * branched to from elsewhere, and the
+ * save insn hasn't been visited yet,
+ * defer following this branch for now.
+ * It will be seen later via the
+ * straight-line path.
+ */
+ if (!prev_insn) {
+ TRACE_INSN(insn, "defer restore");
+ return 0;
}
- insn->cfi = save_insn->cfi;
- nr_cfi_reused++;
+ WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
+ return 1;
}
- state.cfi = *insn->cfi;
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
+ statep->cfi = *insn->cfi;
+ } else {
+ /* XXX track if we actually changed statep->cfi */
+
+ if (prev_insn && !cficmp(prev_insn->cfi, &statep->cfi)) {
+ insn->cfi = prev_insn->cfi;
+ nr_cfi_reused++;
} else {
- /* XXX track if we actually changed state.cfi */
+ insn->cfi = cfi_hash_find_or_add(&statep->cfi);
+ }
+ }
- if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
- insn->cfi = prev_insn->cfi;
- nr_cfi_reused++;
- } else {
- insn->cfi = cfi_hash_find_or_add(&state.cfi);
+ insn->visited |= visited;
+
+ if (propagate_alt_cfi(file, insn))
+ return 1;
+
+ if (insn->alts) {
+ for (alt = insn->alts; alt; alt = alt->next) {
+ TRACE_ALT_BEGIN(insn, alt, alt_name);
+ ret = validate_branch(file, func, alt->insn, *statep);
+ TRACE_ALT_END(insn, alt, alt_name);
+ if (ret) {
+ BT_INSN(insn, "(alt)");
+ return ret;
}
}
+ TRACE_ALT_INFO_NOADDR(insn, "/ ", "DEFAULT");
+ }
- insn->visited |= visited;
+ if (skip_alt_group(insn))
+ return 0;
- if (propagate_alt_cfi(file, insn))
+ if (handle_insn_ops(insn, next_insn, statep))
+ return 1;
+
+ switch (insn->type) {
+
+ case INSN_RETURN:
+ TRACE_INSN(insn, "return");
+ return validate_return(func, insn, statep);
+
+ case INSN_CALL:
+ case INSN_CALL_DYNAMIC:
+ if (insn->type == INSN_CALL)
+ TRACE_INSN(insn, "call");
+ else
+ TRACE_INSN(insn, "indirect call");
+
+ ret = validate_call(file, insn, statep);
+ if (ret)
+ return ret;
+
+ if (opts.stackval && func && !is_special_call(insn) &&
+ !has_valid_stack_frame(statep)) {
+ WARN_INSN(insn, "call without frame pointer save/setup");
return 1;
+ }
- if (insn->alts) {
- for (alt = insn->alts; alt; alt = alt->next) {
- ret = validate_branch(file, func, alt->insn, state);
- if (ret) {
- BT_INSN(insn, "(alt)");
- return ret;
- }
+ break;
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ if (is_sibling_call(insn)) {
+ TRACE_INSN(insn, "sibling call");
+ ret = validate_sibling_call(file, insn, statep);
+ if (ret)
+ return ret;
+
+ } else if (insn->jump_dest) {
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ TRACE_INSN(insn, "unconditional jump");
+ else
+ TRACE_INSN(insn, "jump taken");
+
+ ret = validate_branch(file, func, insn->jump_dest, *statep);
+ if (ret) {
+ BT_INSN(insn, "(branch)");
+ return ret;
}
}
- if (skip_alt_group(insn))
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
return 0;
- if (handle_insn_ops(insn, next_insn, &state))
- return 1;
-
- switch (insn->type) {
-
- case INSN_RETURN:
- return validate_return(func, insn, &state);
+ TRACE_INSN(insn, "jump not taken");
+ break;
- case INSN_CALL:
- case INSN_CALL_DYNAMIC:
- ret = validate_call(file, insn, &state);
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ TRACE_INSN(insn, "indirect jump");
+ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(file, insn, statep);
if (ret)
return ret;
+ }
- if (opts.stackval && func && !is_special_call(insn) &&
- !has_valid_stack_frame(&state)) {
- WARN_INSN(insn, "call without frame pointer save/setup");
- return 1;
- }
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ return 0;
- break;
+ break;
- case INSN_JUMP_CONDITIONAL:
- case INSN_JUMP_UNCONDITIONAL:
- if (is_sibling_call(insn)) {
- ret = validate_sibling_call(file, insn, &state);
- if (ret)
- return ret;
+ case INSN_SYSCALL:
+ TRACE_INSN(insn, "syscall");
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
- } else if (insn->jump_dest) {
- ret = validate_branch(file, func,
- insn->jump_dest, state);
- if (ret) {
- BT_INSN(insn, "(branch)");
- return ret;
- }
- }
+ break;
- if (insn->type == INSN_JUMP_UNCONDITIONAL)
- return 0;
+ case INSN_SYSRET:
+ TRACE_INSN(insn, "sysret");
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
+ return 0;
+
+ case INSN_STAC:
+ TRACE_INSN(insn, "stac");
+ if (!opts.uaccess)
break;
- case INSN_JUMP_DYNAMIC:
- case INSN_JUMP_DYNAMIC_CONDITIONAL:
- if (is_sibling_call(insn)) {
- ret = validate_sibling_call(file, insn, &state);
- if (ret)
- return ret;
- }
+ if (statep->uaccess) {
+ WARN_INSN(insn, "recursive UACCESS enable");
+ return 1;
+ }
- if (insn->type == INSN_JUMP_DYNAMIC)
- return 0;
+ statep->uaccess = true;
+ break;
+ case INSN_CLAC:
+ TRACE_INSN(insn, "clac");
+ if (!opts.uaccess)
break;
- case INSN_SYSCALL:
- if (func && (!next_insn || !next_insn->hint)) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
+ if (!statep->uaccess && func) {
+ WARN_INSN(insn, "redundant UACCESS disable");
+ return 1;
+ }
- break;
+ if (func_uaccess_safe(func) && !statep->uaccess_stack) {
+ WARN_INSN(insn, "UACCESS-safe disables UACCESS");
+ return 1;
+ }
- case INSN_SYSRET:
- if (func && (!next_insn || !next_insn->hint)) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
+ statep->uaccess = false;
+ break;
- return 0;
+ case INSN_STD:
+ TRACE_INSN(insn, "std");
+ if (statep->df) {
+ WARN_INSN(insn, "recursive STD");
+ return 1;
+ }
- case INSN_STAC:
- if (!opts.uaccess)
- break;
+ statep->df = true;
+ break;
- if (state.uaccess) {
- WARN_INSN(insn, "recursive UACCESS enable");
- return 1;
- }
+ case INSN_CLD:
+ TRACE_INSN(insn, "cld");
+ if (!statep->df && func) {
+ WARN_INSN(insn, "redundant CLD");
+ return 1;
+ }
- state.uaccess = true;
- break;
+ statep->df = false;
+ break;
- case INSN_CLAC:
- if (!opts.uaccess)
- break;
+ default:
+ break;
+ }
- if (!state.uaccess && func) {
- WARN_INSN(insn, "redundant UACCESS disable");
- return 1;
- }
+ if (insn->dead_end)
+ TRACE_INSN(insn, "dead end");
- if (func_uaccess_safe(func) && !state.uaccess_stack) {
- WARN_INSN(insn, "UACCESS-safe disables UACCESS");
- return 1;
- }
+ *dead_end = insn->dead_end;
+ return 0;
+}
- state.uaccess = false;
- break;
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps). Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/objtool.txt.
+ */
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state)
+{
+ struct instruction *next_insn, *prev_insn = NULL;
+ bool dead_end;
+ int ret;
- case INSN_STD:
- if (state.df) {
- WARN_INSN(insn, "recursive STD");
- return 1;
- }
+ if (func && func->ignore)
+ return 0;
- state.df = true;
- break;
+ do {
+ insn->trace = 0;
+ next_insn = next_insn_to_validate(file, insn);
- case INSN_CLD:
- if (!state.df && func) {
- WARN_INSN(insn, "redundant CLD");
- return 1;
- }
+ if (opts.checksum && func && insn->sec)
+ checksum_update_insn(file, func, insn);
- state.df = false;
- break;
+ if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
+ /* Ignore KCFI type preambles, which always fall through */
+ if (is_prefix_func(func))
+ return 0;
- default:
- break;
+ if (file->ignore_unreachables)
+ return 0;
+
+ WARN("%s() falls through to next function %s()",
+ func->name, insn_func(insn)->name);
+ func->warned = 1;
+
+ return 1;
}
- if (insn->dead_end)
- return 0;
+ ret = validate_insn(file, func, insn, &state, prev_insn, next_insn,
+ &dead_end);
+
+ if (!insn->trace) {
+ if (ret)
+ TRACE_INSN(insn, "warning (%d)", ret);
+ else
+ TRACE_INSN(insn, NULL);
+ }
- if (!next_insn) {
+ if (!dead_end && !next_insn) {
if (state.cfi.cfa.base == CFI_UNDEFINED)
return 0;
if (file->ignore_unreachables)
@@ -3795,15 +4006,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
WARN("%s%sunexpected end of section %s",
func ? func->name : "", func ? "(): " : "",
- sec->name);
+ insn->sec->name);
return 1;
}
prev_insn = insn;
insn = next_insn;
- }
- return 0;
+ } while (!dead_end);
+
+ return ret;
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state)
+{
+ int ret;
+
+ trace_depth_inc();
+ ret = do_validate_branch(file, func, insn, state);
+ trace_depth_dec();
+
+ return ret;
}
static int validate_unwind_hint(struct objtool_file *file,
@@ -3811,7 +4035,13 @@ static int validate_unwind_hint(struct objtool_file *file,
struct insn_state *state)
{
if (insn->hint && !insn->visited) {
- int ret = validate_branch(file, insn_func(insn), insn, *state);
+ struct symbol *func = insn_func(insn);
+ int ret;
+
+ if (opts.checksum)
+ checksum_init(func);
+
+ ret = validate_branch(file, func, insn, *state);
if (ret)
BT_INSN(insn, "<=== (hint)");
return ret;
@@ -4002,6 +4232,37 @@ static int validate_retpoline(struct objtool_file *file)
warnings++;
}
+ if (!opts.cfi)
+ return warnings;
+
+ /*
+ * kCFI call sites look like:
+ *
+ * movl $(-0x12345678), %r10d
+ * addl -4(%r11), %r10d
+ * jz 1f
+ * ud2
+ * 1: cs call __x86_indirect_thunk_r11
+ *
+ * Verify all indirect calls are kCFI adorned by checking for the
+ * UD2. Notably, doing __nocfi calls to regular (cfi) functions is
+ * broken.
+ */
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+ struct symbol *sym = insn->sym;
+
+ if (sym && (sym->type == STT_NOTYPE ||
+ sym->type == STT_FUNC) && !sym->nocfi) {
+ struct instruction *prev =
+ prev_insn_same_sym(file, insn);
+
+ if (!prev || prev->type != INSN_BUG) {
+ WARN_INSN(insn, "no-cfi indirect call!");
+ warnings++;
+ }
+ }
+ }
+
return warnings;
}
@@ -4024,7 +4285,8 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
struct instruction *prev_insn;
int i;
- if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore))
+ if (insn->type == INSN_NOP || insn->type == INSN_TRAP ||
+ insn->hole || (func && func->ignore))
return true;
/*
@@ -4035,47 +4297,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
- /*
- * Whole archive runs might encounter dead code from weak symbols.
- * This is where the linker will have dropped the weak symbol in
- * favour of a regular symbol, but leaves the code in place.
- *
- * In this case we'll find a piece of code (whole function) that is not
- * covered by a !section symbol. Ignore them.
- */
- if (opts.link && !func) {
- int size = find_symbol_hole_containing(insn->sec, insn->offset);
- unsigned long end = insn->offset + size;
-
- if (!size) /* not a hole */
- return false;
-
- if (size < 0) /* hole until the end */
- return true;
-
- sec_for_each_insn_continue(file, insn) {
- /*
- * If we reach a visited instruction at or before the
- * end of the hole, ignore the unreachable.
- */
- if (insn->visited)
- return true;
-
- if (insn->offset >= end)
- break;
-
- /*
- * If this hole jumps to a .cold function, mark it ignore too.
- */
- if (insn->jump_dest && insn_func(insn->jump_dest) &&
- strstr(insn_func(insn->jump_dest)->name, ".cold")) {
- insn_func(insn->jump_dest)->ignore = true;
- }
- }
-
- return false;
- }
-
if (!func)
return false;
@@ -4127,14 +4348,54 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return false;
}
-static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
+/*
+ * For FineIBT or kCFI, a certain number of bytes preceding the function may be
+ * NOPs. Those NOPs may be rewritten at runtime and executed, so give them a
+ * proper function name: __pfx_<func>.
+ *
+ * The NOPs may not exist for the following cases:
+ *
+ * - compiler cloned functions (*.cold, *.part0, etc)
+ * - asm functions created with inline asm or without SYM_FUNC_START()
+ *
+ * Also, the function may already have a prefix from a previous objtool run
+ * (livepatch extracted functions, or manually running objtool multiple times).
+ *
+ * So return 0 if the NOPs are missing or the function already has a prefix
+ * symbol.
+ */
+static int create_prefix_symbol(struct objtool_file *file, struct symbol *func)
{
struct instruction *insn, *prev;
+ char name[SYM_NAME_LEN];
struct cfi_state *cfi;
+ if (!is_func_sym(func) || is_prefix_func(func) ||
+ func->cold || func->static_call_tramp)
+ return 0;
+
+ if ((strlen(func->name) + sizeof("__pfx_") > SYM_NAME_LEN)) {
+ WARN("%s: symbol name too long, can't create __pfx_ symbol",
+ func->name);
+ return 0;
+ }
+
+ if (snprintf_check(name, SYM_NAME_LEN, "__pfx_%s", func->name))
+ return -1;
+
+ if (file->klp) {
+ struct symbol *pfx;
+
+ pfx = find_symbol_by_offset(func->sec, func->offset - opts.prefix);
+ if (pfx && is_prefix_func(pfx) && !strcmp(pfx->name, name))
+ return 0;
+ }
+
insn = find_insn(file, func->sec, func->offset);
- if (!insn)
+ if (!insn) {
+ WARN("%s: can't find starting instruction", func->name);
return -1;
+ }
for (prev = prev_insn_same_sec(file, insn);
prev;
@@ -4142,22 +4403,27 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
u64 offset;
if (prev->type != INSN_NOP)
- return -1;
+ return 0;
offset = func->offset - prev->offset;
if (offset > opts.prefix)
- return -1;
+ return 0;
if (offset < opts.prefix)
continue;
- elf_create_prefix_symbol(file->elf, func, opts.prefix);
+ if (!elf_create_symbol(file->elf, name, func->sec,
+ GELF_ST_BIND(func->sym.st_info),
+ GELF_ST_TYPE(func->sym.st_info),
+ prev->offset, opts.prefix))
+ return -1;
+
break;
}
if (!prev)
- return -1;
+ return 0;
if (!insn->cfi) {
/*
@@ -4175,20 +4441,18 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
return 0;
}
-static int add_prefix_symbols(struct objtool_file *file)
+static int create_prefix_symbols(struct objtool_file *file)
{
struct section *sec;
struct symbol *func;
- for_each_sec(file, sec) {
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ for_each_sec(file->elf, sec) {
+ if (!is_text_sec(sec))
continue;
sec_for_each_sym(sec, func) {
- if (func->type != STT_FUNC)
- continue;
-
- add_prefix_symbol(file, func);
+ if (create_prefix_symbol(file, func))
+ return -1;
}
}
@@ -4199,6 +4463,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
struct symbol *sym, struct insn_state *state)
{
struct instruction *insn;
+ struct symbol *func;
int ret;
if (!sym->len) {
@@ -4216,9 +4481,26 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
if (opts.uaccess)
state->uaccess = sym->uaccess_safe;
- ret = validate_branch(file, insn_func(insn), insn, *state);
+ func = insn_func(insn);
+
+ if (opts.checksum)
+ checksum_init(func);
+
+ if (opts.trace && !fnmatch(opts.trace, sym->name, 0)) {
+ trace_enable();
+ TRACE("%s: validation begin\n", sym->name);
+ }
+
+ ret = validate_branch(file, func, insn, *state);
if (ret)
BT_INSN(insn, "<=== (sym)");
+
+ TRACE("%s: validation %s\n\n", sym->name, ret ? "failed" : "end");
+ trace_disable();
+
+ if (opts.checksum)
+ checksum_finish(func);
+
return ret;
}
@@ -4229,7 +4511,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
int warnings = 0;
sec_for_each_sym(sec, func) {
- if (func->type != STT_FUNC)
+ if (!is_func_sym(func))
continue;
init_insn_state(file, &state, sec);
@@ -4272,8 +4554,8 @@ static int validate_functions(struct objtool_file *file)
struct section *sec;
int warnings = 0;
- for_each_sec(file, sec) {
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ for_each_sec(file->elf, sec) {
+ if (!is_text_sec(sec))
continue;
warnings += validate_section(file, sec);
@@ -4400,12 +4682,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
reloc_offset(reloc) + 1,
(insn->offset + insn->len) - (reloc_offset(reloc) + 1))) {
- off = reloc->sym->offset;
- if (reloc_type(reloc) == R_X86_64_PC32 ||
- reloc_type(reloc) == R_X86_64_PLT32)
- off += arch_dest_reloc_offset(reloc_addend(reloc));
- else
- off += reloc_addend(reloc);
+ off = reloc->sym->offset + arch_insn_adjusted_addend(insn, reloc);
dest = find_insn(file, reloc->sym->sec, off);
if (!dest)
@@ -4456,10 +4733,10 @@ static int validate_ibt(struct objtool_file *file)
for_each_insn(file, insn)
warnings += validate_ibt_insn(file, insn);
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
/* Already done by validate_ibt_insn() */
- if (sec->sh.sh_flags & SHF_EXECINSTR)
+ if (is_text_sec(sec))
continue;
if (!sec->rsec)
@@ -4474,8 +4751,8 @@ static int validate_ibt(struct objtool_file *file)
!strncmp(sec->name, ".debug", 6) ||
!strcmp(sec->name, ".altinstructions") ||
!strcmp(sec->name, ".ibt_endbr_seal") ||
+ !strcmp(sec->name, ".kcfi_traps") ||
!strcmp(sec->name, ".orc_unwind_ip") ||
- !strcmp(sec->name, ".parainstructions") ||
!strcmp(sec->name, ".retpoline_sites") ||
!strcmp(sec->name, ".smp_locks") ||
!strcmp(sec->name, ".static_call_sites") ||
@@ -4484,12 +4761,14 @@ static int validate_ibt(struct objtool_file *file)
!strcmp(sec->name, "__bug_table") ||
!strcmp(sec->name, "__ex_table") ||
!strcmp(sec->name, "__jump_table") ||
+ !strcmp(sec->name, "__klp_funcs") ||
!strcmp(sec->name, "__mcount_loc") ||
- !strcmp(sec->name, ".kcfi_traps") ||
!strcmp(sec->name, ".llvm.call-graph-profile") ||
!strcmp(sec->name, ".llvm_bb_addr_map") ||
!strcmp(sec->name, "__tracepoints") ||
- strstr(sec->name, "__patchable_function_entries"))
+ !strcmp(sec->name, ".return_sites") ||
+ !strcmp(sec->name, ".call_sites") ||
+ !strcmp(sec->name, "__patchable_function_entries"))
continue;
for_each_reloc(sec->rsec, reloc)
@@ -4563,85 +4842,45 @@ static int validate_reachable_instructions(struct objtool_file *file)
return warnings;
}
-/* 'funcs' is a space-separated list of function names */
-static void disas_funcs(const char *funcs)
+__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
{
- const char *objdump_str, *cross_compile;
- int size, ret;
- char *cmd;
-
- cross_compile = getenv("CROSS_COMPILE");
- if (!cross_compile)
- cross_compile = "";
-
- objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
- "BEGIN { split(_funcs, funcs); }"
- "/^$/ { func_match = 0; }"
- "/<.*>:/ { "
- "f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
- "for (i in funcs) {"
- "if (funcs[i] == f) {"
- "func_match = 1;"
- "base = strtonum(\"0x\" $1);"
- "break;"
- "}"
- "}"
- "}"
- "{"
- "if (func_match) {"
- "addr = strtonum(\"0x\" $1);"
- "printf(\"%%04x \", addr - base);"
- "print;"
- "}"
- "}' 1>&2";
-
- /* fake snprintf() to calculate the size */
- size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
- if (size <= 0) {
- WARN("objdump string size calculation failed");
- return;
- }
-
- cmd = malloc(size);
+ unsigned int type = reloc_type(reloc);
+ size_t sz = elf_addr_size(elf);
- /* real snprintf() */
- snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
- ret = system(cmd);
- if (ret) {
- WARN("disassembly failed: %d", ret);
- return;
- }
+ return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32);
}
-static void disas_warned_funcs(struct objtool_file *file)
+static int check_abs_references(struct objtool_file *file)
{
- struct symbol *sym;
- char *funcs = NULL, *tmp;
-
- for_each_sym(file, sym) {
- if (sym->warned) {
- if (!funcs) {
- funcs = malloc(strlen(sym->name) + 1);
- if (!funcs) {
- ERROR_GLIBC("malloc");
- return;
- }
- strcpy(funcs, sym->name);
- } else {
- tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
- if (!tmp) {
- ERROR_GLIBC("malloc");
- return;
- }
- sprintf(tmp, "%s %s", funcs, sym->name);
- free(funcs);
- funcs = tmp;
+ struct section *sec;
+ struct reloc *reloc;
+ int ret = 0;
+
+ for_each_sec(file->elf, sec) {
+ /* absolute references in non-loadable sections are fine */
+ if (!(sec->sh.sh_flags & SHF_ALLOC))
+ continue;
+
+ /* section must have an associated .rela section */
+ if (!sec->rsec)
+ continue;
+
+ /*
+ * Special case for compiler generated metadata that is not
+ * consumed until after boot.
+ */
+ if (!strcmp(sec->name, "__patchable_function_entries"))
+ continue;
+
+ for_each_reloc(sec->rsec, reloc) {
+ if (arch_absolute_reloc(file->elf, reloc)) {
+ WARN("section %s has absolute relocation at offset 0x%llx",
+ sec->name, (unsigned long long)reloc_offset(reloc));
+ ret++;
}
}
}
-
- if (funcs)
- disas_funcs(funcs);
+ return ret;
}
struct insn_chunk {
@@ -4672,10 +4911,35 @@ static void free_insns(struct objtool_file *file)
free(chunk->addr);
}
+const char *objtool_disas_insn(struct instruction *insn)
+{
+ struct disas_context *dctx = objtool_disas_ctx;
+
+ if (!dctx)
+ return "";
+
+ disas_insn(dctx, insn);
+ return disas_result(dctx);
+}
+
int check(struct objtool_file *file)
{
+ struct disas_context *disas_ctx = NULL;
int ret = 0, warnings = 0;
+ /*
+ * Create a disassembly context if we might disassemble any
+ * instruction or function.
+ */
+ if (opts.verbose || opts.backtrace || opts.trace || opts.disas) {
+ disas_ctx = disas_context_create(file);
+ if (!disas_ctx) {
+ opts.disas = false;
+ opts.trace = false;
+ }
+ objtool_disas_ctx = disas_ctx;
+ }
+
arch_initial_func_cfi_state(&initial_func_cfi);
init_cfi_state(&init_cfi);
init_cfi_state(&func_cfi);
@@ -4691,6 +4955,10 @@ int check(struct objtool_file *file)
cfi_hash_add(&init_cfi);
cfi_hash_add(&func_cfi);
+ ret = checksum_debug_init(file);
+ if (ret)
+ goto out;
+
ret = decode_sections(file);
if (ret)
goto out;
@@ -4701,7 +4969,7 @@ int check(struct objtool_file *file)
if (opts.retpoline)
warnings += validate_retpoline(file);
- if (opts.stackval || opts.orc || opts.uaccess) {
+ if (validate_branch_enabled()) {
int w = 0;
w += validate_functions(file);
@@ -4766,7 +5034,7 @@ int check(struct objtool_file *file)
}
if (opts.prefix) {
- ret = add_prefix_symbols(file);
+ ret = create_prefix_symbols(file);
if (ret)
goto out;
}
@@ -4777,14 +5045,21 @@ int check(struct objtool_file *file)
goto out;
}
+ if (opts.noabs)
+ warnings += check_abs_references(file);
+
+ if (opts.checksum) {
+ ret = create_sym_checksum_section(file);
+ if (ret)
+ goto out;
+ }
+
if (opts.orc && nr_insns) {
ret = orc_create(file);
if (ret)
goto out;
}
- free_insns(file);
-
if (opts.stats) {
printf("nr_insns_visited: %ld\n", nr_insns_visited);
printf("nr_cfi: %ld\n", nr_cfi);
@@ -4793,18 +5068,32 @@ int check(struct objtool_file *file)
}
out:
- if (!ret && !warnings)
- return 0;
+ if (ret || warnings) {
+ if (opts.werror && warnings)
+ ret = 1;
- if (opts.werror && warnings)
- ret = 1;
+ if (opts.verbose) {
+ if (opts.werror && warnings)
+ WARN("%d warning(s) upgraded to errors", warnings);
+ disas_warned_funcs(disas_ctx);
+ }
+ }
- if (opts.verbose) {
- if (opts.werror && warnings)
- WARN("%d warning(s) upgraded to errors", warnings);
- print_args();
- disas_warned_funcs(file);
+ if (opts.disas)
+ disas_funcs(disas_ctx);
+
+ if (disas_ctx) {
+ disas_context_destroy(disas_ctx);
+ objtool_disas_ctx = NULL;
}
+ free_insns(file);
+
+ if (!ret && !warnings)
+ return 0;
+
+ if (opts.backup && make_backup())
+ return 1;
+
return ret;
}
diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c
new file mode 100644
index 000000000000..2b5059f55e40
--- /dev/null
+++ b/tools/objtool/disas.c
@@ -0,0 +1,1248 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <fnmatch.h>
+
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/disas.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+
+#include <bfd.h>
+#include <linux/string.h>
+#include <tools/dis-asm-compat.h>
+
+/*
+ * Size of the buffer for storing the result of disassembling
+ * a single instruction.
+ */
+#define DISAS_RESULT_SIZE 1024
+
+struct disas_context {
+ struct objtool_file *file;
+ struct instruction *insn;
+ bool alt_applied;
+ char result[DISAS_RESULT_SIZE];
+ disassembler_ftype disassembler;
+ struct disassemble_info info;
+};
+
+/*
+ * Maximum number of alternatives
+ */
+#define DISAS_ALT_MAX 5
+
+/*
+ * Maximum number of instructions per alternative
+ */
+#define DISAS_ALT_INSN_MAX 50
+
+/*
+ * Information to disassemble an alternative
+ */
+struct disas_alt {
+ struct instruction *orig_insn; /* original instruction */
+ struct alternative *alt; /* alternative or NULL if default code */
+ char *name; /* name for this alternative */
+ int width; /* formatting width */
+ struct {
+ char *str; /* instruction string */
+ int offset; /* instruction offset */
+ int nops; /* number of nops */
+ } insn[DISAS_ALT_INSN_MAX]; /* alternative instructions */
+ int insn_idx; /* index of the next instruction to print */
+};
+
+#define DALT_DEFAULT(dalt) (!(dalt)->alt)
+#define DALT_INSN(dalt) (DALT_DEFAULT(dalt) ? (dalt)->orig_insn : (dalt)->alt->insn)
+#define DALT_GROUP(dalt) (DALT_INSN(dalt)->alt_group)
+#define DALT_ALTID(dalt) ((dalt)->orig_insn->offset)
+
+#define ALT_FLAGS_SHIFT 16
+#define ALT_FLAG_NOT (1 << 0)
+#define ALT_FLAG_DIRECT_CALL (1 << 1)
+#define ALT_FEATURE_MASK ((1 << ALT_FLAGS_SHIFT) - 1)
+
+static int alt_feature(unsigned int ft_flags)
+{
+ return (ft_flags & ALT_FEATURE_MASK);
+}
+
+static int alt_flags(unsigned int ft_flags)
+{
+ return (ft_flags >> ALT_FLAGS_SHIFT);
+}
+
+/*
+ * Wrapper around asprintf() to allocate and format a string.
+ * Return the allocated string or NULL on error.
+ */
+static char *strfmt(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+ int rv;
+
+ va_start(ap, fmt);
+ rv = vasprintf(&str, fmt, ap);
+ va_end(ap);
+
+ return rv == -1 ? NULL : str;
+}
+
+static int sprint_name(char *str, const char *name, unsigned long offset)
+{
+ int len;
+
+ if (offset)
+ len = sprintf(str, "%s+0x%lx", name, offset);
+ else
+ len = sprintf(str, "%s", name);
+
+ return len;
+}
+
+#define DINFO_FPRINTF(dinfo, ...) \
+ ((*(dinfo)->fprintf_func)((dinfo)->stream, __VA_ARGS__))
+
+static int disas_result_fprintf(struct disas_context *dctx,
+ const char *fmt, va_list ap)
+{
+ char *buf = dctx->result;
+ int avail, len;
+
+ len = strlen(buf);
+ if (len >= DISAS_RESULT_SIZE - 1) {
+ WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+ "disassembly buffer is full");
+ return -1;
+ }
+ avail = DISAS_RESULT_SIZE - len;
+
+ len = vsnprintf(buf + len, avail, fmt, ap);
+ if (len < 0 || len >= avail) {
+ WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+ "disassembly buffer is truncated");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disas_fprintf(void *stream, const char *fmt, ...)
+{
+ va_list arg;
+ int rv;
+
+ va_start(arg, fmt);
+ rv = disas_result_fprintf(stream, fmt, arg);
+ va_end(arg);
+
+ return rv;
+}
+
+/*
+ * For init_disassemble_info_compat().
+ */
+static int disas_fprintf_styled(void *stream,
+ enum disassembler_style style,
+ const char *fmt, ...)
+{
+ va_list arg;
+ int rv;
+
+ va_start(arg, fmt);
+ rv = disas_result_fprintf(stream, fmt, arg);
+ va_end(arg);
+
+ return rv;
+}
+
+static void disas_print_addr_sym(struct section *sec, struct symbol *sym,
+ bfd_vma addr, struct disassemble_info *dinfo)
+{
+ char symstr[1024];
+ char *str;
+
+ if (sym) {
+ sprint_name(symstr, sym->name, addr - sym->offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+ } else {
+ str = offstr(sec, addr);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+ free(str);
+ }
+}
+
+static bool disas_print_addr_alt(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *orig_first_insn;
+ struct alt_group *alt_group;
+ unsigned long offset;
+ struct symbol *sym;
+
+ /*
+ * Check if we are processing an alternative at the original
+ * instruction address (i.e. if alt_applied is true) and if
+ * we are referencing an address inside the alternative.
+ *
+ * For example, this happens if there is a branch inside an
+ * alternative. In that case, the address should be updated
+ * to a reference inside the original instruction flow.
+ */
+ if (!dctx->alt_applied)
+ return false;
+
+ alt_group = dctx->insn->alt_group;
+ if (!alt_group || !alt_group->orig_group ||
+ addr < alt_group->first_insn->offset ||
+ addr > alt_group->last_insn->offset)
+ return false;
+
+ orig_first_insn = alt_group->orig_group->first_insn;
+ offset = addr - alt_group->first_insn->offset;
+
+ addr = orig_first_insn->offset + offset;
+ sym = orig_first_insn->sym;
+
+ disas_print_addr_sym(orig_first_insn->sec, sym, addr, dinfo);
+
+ return true;
+}
+
+static void disas_print_addr_noreloc(bfd_vma addr,
+ struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ struct symbol *sym = NULL;
+
+ if (disas_print_addr_alt(addr, dinfo))
+ return;
+
+ if (insn->sym && addr >= insn->sym->offset &&
+ addr < insn->sym->offset + insn->sym->len) {
+ sym = insn->sym;
+ }
+
+ disas_print_addr_sym(insn->sec, sym, addr, dinfo);
+}
+
+static void disas_print_addr_reloc(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ unsigned long offset;
+ struct reloc *reloc;
+ char symstr[1024];
+ char *str;
+
+ reloc = find_reloc_by_dest_range(dctx->file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!reloc) {
+ /*
+ * There is no relocation for this instruction although
+ * the address to resolve points to the next instruction.
+ * So this is an effective reference to the next IP, for
+ * example: "lea 0x0(%rip),%rdi". The kernel can reference
+ * the next IP with _THIS_IP_ macro.
+ */
+ DINFO_FPRINTF(dinfo, "0x%lx <_THIS_IP_>", addr);
+ return;
+ }
+
+ offset = arch_insn_adjusted_addend(insn, reloc);
+
+ /*
+ * If the relocation symbol is a section name (for example ".bss")
+ * then we try to further resolve the name.
+ */
+ if (reloc->sym->type == STT_SECTION) {
+ str = offstr(reloc->sym->sec, reloc->sym->offset + offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+ free(str);
+ } else {
+ sprint_name(symstr, reloc->sym->name, offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+ }
+}
+
+/*
+ * Resolve an address into a "<symbol>+<offset>" string.
+ */
+static void disas_print_address(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ struct instruction *jump_dest;
+ struct symbol *sym;
+ bool is_reloc;
+
+ /*
+ * If the instruction is a call/jump and it references a
+ * destination then this is likely the address we are looking
+ * up. So check it first.
+ */
+ jump_dest = insn->jump_dest;
+ if (jump_dest && jump_dest->sym && jump_dest->offset == addr) {
+ if (!disas_print_addr_alt(addr, dinfo))
+ disas_print_addr_sym(jump_dest->sec, jump_dest->sym,
+ addr, dinfo);
+ return;
+ }
+
+ /*
+ * If the address points to the next instruction then there is
+ * probably a relocation. It can be a false positive when the
+ * current instruction is referencing the address of the next
+ * instruction. This particular case will be handled in
+ * disas_print_addr_reloc().
+ */
+ is_reloc = (addr == insn->offset + insn->len);
+
+ /*
+ * The call destination offset can be the address we are looking
+ * up, or 0 if there is a relocation.
+ */
+ sym = insn_call_dest(insn);
+ if (sym && (sym->offset == addr || (sym->offset == 0 && is_reloc))) {
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, sym->name);
+ return;
+ }
+
+ if (!is_reloc)
+ disas_print_addr_noreloc(addr, dinfo);
+ else
+ disas_print_addr_reloc(addr, dinfo);
+}
+
+/*
+ * Initialize disassemble info arch, mach (32 or 64-bit) and options.
+ */
+int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct objtool_file *file = dctx->file;
+
+ dinfo->arch = arch;
+
+ switch (file->elf->ehdr.e_ident[EI_CLASS]) {
+ case ELFCLASS32:
+ dinfo->mach = mach32;
+ break;
+ case ELFCLASS64:
+ dinfo->mach = mach64;
+ break;
+ default:
+ return -1;
+ }
+
+ dinfo->disassembler_options = options;
+
+ return 0;
+}
+
+struct disas_context *disas_context_create(struct objtool_file *file)
+{
+ struct disas_context *dctx;
+ struct disassemble_info *dinfo;
+ int err;
+
+ dctx = malloc(sizeof(*dctx));
+ if (!dctx) {
+ WARN("failed to allocate disassembly context");
+ return NULL;
+ }
+
+ dctx->file = file;
+ dinfo = &dctx->info;
+
+ init_disassemble_info_compat(dinfo, dctx,
+ disas_fprintf, disas_fprintf_styled);
+
+ dinfo->read_memory_func = buffer_read_memory;
+ dinfo->print_address_func = disas_print_address;
+ dinfo->application_data = dctx;
+
+ /*
+ * bfd_openr() is not used to avoid doing ELF data processing
+ * and caching that has already being done. Here, we just need
+ * to identify the target file so we call an arch specific
+ * function to fill some disassemble info (arch, mach).
+ */
+
+ dinfo->arch = bfd_arch_unknown;
+ dinfo->mach = 0;
+
+ err = arch_disas_info_init(dinfo);
+ if (err || dinfo->arch == bfd_arch_unknown || dinfo->mach == 0) {
+ WARN("failed to init disassembly arch");
+ goto error;
+ }
+
+ dinfo->endian = (file->elf->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) ?
+ BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
+
+ disassemble_init_for_target(dinfo);
+
+ dctx->disassembler = disassembler(dinfo->arch,
+ dinfo->endian == BFD_ENDIAN_BIG,
+ dinfo->mach, NULL);
+ if (!dctx->disassembler) {
+ WARN("failed to create disassembler function");
+ goto error;
+ }
+
+ return dctx;
+
+error:
+ free(dctx);
+ return NULL;
+}
+
+void disas_context_destroy(struct disas_context *dctx)
+{
+ free(dctx);
+}
+
+char *disas_result(struct disas_context *dctx)
+{
+ return dctx->result;
+}
+
+#define DISAS_INSN_OFFSET_SPACE 10
+#define DISAS_INSN_SPACE 60
+
+#define DISAS_PRINSN(dctx, insn, depth) \
+ disas_print_insn(stdout, dctx, insn, depth, "\n")
+
+/*
+ * Print a message in the instruction flow. If sec is not NULL then the
+ * address at the section offset is printed in addition of the message,
+ * otherwise only the message is printed.
+ */
+static int disas_vprint(FILE *stream, struct section *sec, unsigned long offset,
+ int depth, const char *format, va_list ap)
+{
+ const char *addr_str;
+ int i, n;
+ int len;
+
+ len = sym_name_max_len + DISAS_INSN_OFFSET_SPACE;
+ if (depth < 0) {
+ len += depth;
+ depth = 0;
+ }
+
+ n = 0;
+
+ if (sec) {
+ addr_str = offstr(sec, offset);
+ n += fprintf(stream, "%6lx: %-*s ", offset, len, addr_str);
+ free((char *)addr_str);
+ } else {
+ len += DISAS_INSN_OFFSET_SPACE + 1;
+ n += fprintf(stream, "%-*s", len, "");
+ }
+
+ /* print vertical bars to show the code flow */
+ for (i = 0; i < depth; i++)
+ n += fprintf(stream, "| ");
+
+ if (format)
+ n += vfprintf(stream, format, ap);
+
+ return n;
+}
+
+static int disas_print(FILE *stream, struct section *sec, unsigned long offset,
+ int depth, const char *format, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, format);
+ len = disas_vprint(stream, sec, offset, depth, format, args);
+ va_end(args);
+
+ return len;
+}
+
+/*
+ * Print a message in the instruction flow. If insn is not NULL then
+ * the instruction address is printed in addition of the message,
+ * otherwise only the message is printed. In all cases, the instruction
+ * itself is not printed.
+ */
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+ const char *format, ...)
+{
+ struct section *sec;
+ unsigned long off;
+ va_list args;
+
+ if (insn) {
+ sec = insn->sec;
+ off = insn->offset;
+ } else {
+ sec = NULL;
+ off = 0;
+ }
+
+ va_start(args, format);
+ disas_vprint(stream, sec, off, depth, format, args);
+ va_end(args);
+}
+
+/*
+ * Print an instruction address (offset and function), the instruction itself
+ * and an optional message.
+ */
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...)
+{
+ char fake_nop_insn[32];
+ const char *insn_str;
+ bool fake_nop;
+ va_list args;
+ int len;
+
+ /*
+ * Alternative can insert a fake nop, sometimes with no
+ * associated section so nothing to disassemble.
+ */
+ fake_nop = (!insn->sec && insn->type == INSN_NOP);
+ if (fake_nop) {
+ snprintf(fake_nop_insn, 32, "<fake nop> (%d bytes)", insn->len);
+ insn_str = fake_nop_insn;
+ } else {
+ disas_insn(dctx, insn);
+ insn_str = disas_result(dctx);
+ }
+
+ /* print the instruction */
+ len = (depth + 1) * 2 < DISAS_INSN_SPACE ? DISAS_INSN_SPACE - (depth+1) * 2 : 1;
+ disas_print_info(stream, insn, depth, "%-*s", len, insn_str);
+
+ /* print message if any */
+ if (!format)
+ return;
+
+ if (strcmp(format, "\n") == 0) {
+ fprintf(stream, "\n");
+ return;
+ }
+
+ fprintf(stream, " - ");
+ va_start(args, format);
+ vfprintf(stream, format, args);
+ va_end(args);
+}
+
+/*
+ * Disassemble a single instruction. Return the size of the instruction.
+ *
+ * If alt_applied is true then insn should be an instruction from of an
+ * alternative (i.e. insn->alt_group != NULL), and it is disassembled
+ * at the location of the original code it is replacing. When the
+ * instruction references any address inside the alternative then
+ * these references will be re-adjusted to replace the original code.
+ */
+static size_t disas_insn_common(struct disas_context *dctx,
+ struct instruction *insn,
+ bool alt_applied)
+{
+ disassembler_ftype disasm = dctx->disassembler;
+ struct disassemble_info *dinfo = &dctx->info;
+
+ dctx->insn = insn;
+ dctx->alt_applied = alt_applied;
+ dctx->result[0] = '\0';
+
+ if (insn->type == INSN_NOP) {
+ DINFO_FPRINTF(dinfo, "nop%d", insn->len);
+ return insn->len;
+ }
+
+ /*
+ * Set the disassembler buffer to read data from the section
+ * containing the instruction to disassemble.
+ */
+ dinfo->buffer = insn->sec->data->d_buf;
+ dinfo->buffer_vma = 0;
+ dinfo->buffer_length = insn->sec->sh.sh_size;
+
+ return disasm(insn->offset, &dctx->info);
+}
+
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn)
+{
+ return disas_insn_common(dctx, insn, false);
+}
+
+static size_t disas_insn_alt(struct disas_context *dctx,
+ struct instruction *insn)
+{
+ return disas_insn_common(dctx, insn, true);
+}
+
+static struct instruction *next_insn_same_alt(struct objtool_file *file,
+ struct alt_group *alt_grp,
+ struct instruction *insn)
+{
+ if (alt_grp->last_insn == insn || alt_grp->nop == insn)
+ return NULL;
+
+ return next_insn_same_sec(file, insn);
+}
+
+#define alt_for_each_insn(file, alt_grp, insn) \
+ for (insn = alt_grp->first_insn; \
+ insn; \
+ insn = next_insn_same_alt(file, alt_grp, insn))
+
+/*
+ * Provide a name for the type of alternatives present at the
+ * specified instruction.
+ *
+ * An instruction can have alternatives with different types, for
+ * example alternative instructions and an exception table. In that
+ * case the name for the alternative instructions type is used.
+ *
+ * Return NULL if the instruction as no alternative.
+ */
+const char *disas_alt_type_name(struct instruction *insn)
+{
+ struct alternative *alt;
+ const char *name;
+
+ name = NULL;
+ for (alt = insn->alts; alt; alt = alt->next) {
+ if (alt->type == ALT_TYPE_INSTRUCTIONS) {
+ name = "alternative";
+ break;
+ }
+
+ switch (alt->type) {
+ case ALT_TYPE_EX_TABLE:
+ name = "ex_table";
+ break;
+ case ALT_TYPE_JUMP_TABLE:
+ name = "jump_table";
+ break;
+ default:
+ name = "unknown";
+ break;
+ }
+ }
+
+ return name;
+}
+
+/*
+ * Provide a name for an alternative.
+ */
+char *disas_alt_name(struct alternative *alt)
+{
+ char pfx[4] = { 0 };
+ char *str = NULL;
+ const char *name;
+ int feature;
+ int flags;
+ int num;
+
+ switch (alt->type) {
+
+ case ALT_TYPE_EX_TABLE:
+ str = strdup("EXCEPTION");
+ break;
+
+ case ALT_TYPE_JUMP_TABLE:
+ str = strdup("JUMP");
+ break;
+
+ case ALT_TYPE_INSTRUCTIONS:
+ /*
+ * This is a non-default group alternative. Create a name
+ * based on the feature and flags associated with this
+ * alternative. Use either the feature name (it is available)
+ * or the feature number. And add a prefix to show the flags
+ * used.
+ *
+ * Prefix flags characters:
+ *
+ * '!' alternative used when feature not enabled
+ * '+' direct call alternative
+ * '?' unknown flag
+ */
+
+ if (!alt->insn->alt_group)
+ return NULL;
+
+ feature = alt->insn->alt_group->feature;
+ num = alt_feature(feature);
+ flags = alt_flags(feature);
+ str = pfx;
+
+ if (flags & ~(ALT_FLAG_NOT | ALT_FLAG_DIRECT_CALL))
+ *str++ = '?';
+ if (flags & ALT_FLAG_DIRECT_CALL)
+ *str++ = '+';
+ if (flags & ALT_FLAG_NOT)
+ *str++ = '!';
+
+ name = arch_cpu_feature_name(num);
+ if (!name)
+ str = strfmt("%sFEATURE 0x%X", pfx, num);
+ else
+ str = strfmt("%s%s", pfx, name);
+
+ break;
+ }
+
+ return str;
+}
+
+/*
+ * Initialize an alternative. The default alternative should be initialized
+ * with alt=NULL.
+ */
+static int disas_alt_init(struct disas_alt *dalt,
+ struct instruction *orig_insn,
+ struct alternative *alt)
+{
+ dalt->orig_insn = orig_insn;
+ dalt->alt = alt;
+ dalt->insn_idx = 0;
+ dalt->name = alt ? disas_alt_name(alt) : strdup("DEFAULT");
+ if (!dalt->name)
+ return -1;
+ dalt->width = strlen(dalt->name);
+
+ return 0;
+}
+
+static int disas_alt_add_insn(struct disas_alt *dalt, int index, char *insn_str,
+ int offset, int nops)
+{
+ int len;
+
+ if (index >= DISAS_ALT_INSN_MAX) {
+ WARN("Alternative %lx.%s has more instructions than supported",
+ DALT_ALTID(dalt), dalt->name);
+ return -1;
+ }
+
+ len = strlen(insn_str);
+ dalt->insn[index].str = insn_str;
+ dalt->insn[index].offset = offset;
+ dalt->insn[index].nops = nops;
+ if (len > dalt->width)
+ dalt->width = len;
+
+ return 0;
+}
+
+static int disas_alt_jump(struct disas_alt *dalt)
+{
+ struct instruction *orig_insn;
+ struct instruction *dest_insn;
+ char suffix[2] = { 0 };
+ char *str;
+ int nops;
+
+ orig_insn = dalt->orig_insn;
+ dest_insn = dalt->alt->insn;
+
+ if (orig_insn->type == INSN_NOP) {
+ if (orig_insn->len == 5)
+ suffix[0] = 'q';
+ str = strfmt("jmp%-3s %lx <%s+0x%lx>", suffix,
+ dest_insn->offset, dest_insn->sym->name,
+ dest_insn->offset - dest_insn->sym->offset);
+ nops = 0;
+ } else {
+ str = strfmt("nop%d", orig_insn->len);
+ nops = orig_insn->len;
+ }
+
+ if (!str)
+ return -1;
+
+ disas_alt_add_insn(dalt, 0, str, 0, nops);
+
+ return 1;
+}
+
+/*
+ * Disassemble an exception table alternative.
+ */
+static int disas_alt_extable(struct disas_alt *dalt)
+{
+ struct instruction *alt_insn;
+ char *str;
+
+ alt_insn = dalt->alt->insn;
+ str = strfmt("resume at 0x%lx <%s+0x%lx>",
+ alt_insn->offset, alt_insn->sym->name,
+ alt_insn->offset - alt_insn->sym->offset);
+ if (!str)
+ return -1;
+
+ disas_alt_add_insn(dalt, 0, str, 0, 0);
+
+ return 1;
+}
+
+/*
+ * Disassemble an alternative and store instructions in the disas_alt
+ * structure. Return the number of instructions in the alternative.
+ */
+static int disas_alt_group(struct disas_context *dctx, struct disas_alt *dalt)
+{
+ struct objtool_file *file;
+ struct instruction *insn;
+ int offset;
+ char *str;
+ int count;
+ int nops;
+ int err;
+
+ file = dctx->file;
+ count = 0;
+ offset = 0;
+ nops = 0;
+
+ alt_for_each_insn(file, DALT_GROUP(dalt), insn) {
+
+ disas_insn_alt(dctx, insn);
+ str = strdup(disas_result(dctx));
+ if (!str)
+ return -1;
+
+ nops = insn->type == INSN_NOP ? insn->len : 0;
+ err = disas_alt_add_insn(dalt, count, str, offset, nops);
+ if (err)
+ break;
+ offset += insn->len;
+ count++;
+ }
+
+ return count;
+}
+
+/*
+ * Disassemble the default alternative.
+ */
+static int disas_alt_default(struct disas_context *dctx, struct disas_alt *dalt)
+{
+ char *str;
+ int nops;
+ int err;
+
+ if (DALT_GROUP(dalt))
+ return disas_alt_group(dctx, dalt);
+
+ /*
+ * Default alternative with no alt_group: this is the default
+ * code associated with either a jump table or an exception
+ * table and no other instruction alternatives. In that case
+ * the default alternative is made of a single instruction.
+ */
+ disas_insn(dctx, dalt->orig_insn);
+ str = strdup(disas_result(dctx));
+ if (!str)
+ return -1;
+ nops = dalt->orig_insn->type == INSN_NOP ? dalt->orig_insn->len : 0;
+ err = disas_alt_add_insn(dalt, 0, str, 0, nops);
+ if (err)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * For each alternative, if there is an instruction at the specified
+ * offset then print this instruction, otherwise print a blank entry.
+ * The offset is an offset from the start of the alternative.
+ *
+ * Return the offset for the next instructions to print, or -1 if all
+ * instructions have been printed.
+ */
+static int disas_alt_print_insn(struct disas_alt *dalts, int alt_count,
+ int insn_count, int offset)
+{
+ struct disas_alt *dalt;
+ int offset_next;
+ char *str;
+ int i, j;
+
+ offset_next = -1;
+
+ for (i = 0; i < alt_count; i++) {
+ dalt = &dalts[i];
+ j = dalt->insn_idx;
+ if (j == -1) {
+ printf("| %-*s ", dalt->width, "");
+ continue;
+ }
+
+ if (dalt->insn[j].offset == offset) {
+ str = dalt->insn[j].str;
+ printf("| %-*s ", dalt->width, str ?: "");
+ if (++j < insn_count) {
+ dalt->insn_idx = j;
+ } else {
+ dalt->insn_idx = -1;
+ continue;
+ }
+ } else {
+ printf("| %-*s ", dalt->width, "");
+ }
+
+ if (dalt->insn[j].offset > 0 &&
+ (offset_next == -1 ||
+ (dalt->insn[j].offset < offset_next)))
+ offset_next = dalt->insn[j].offset;
+ }
+ printf("\n");
+
+ return offset_next;
+}
+
+/*
+ * Print all alternatives side-by-side.
+ */
+static void disas_alt_print_wide(char *alt_name, struct disas_alt *dalts, int alt_count,
+ int insn_count)
+{
+ struct instruction *orig_insn;
+ int offset_next;
+ int offset;
+ int i;
+
+ orig_insn = dalts[0].orig_insn;
+
+ /*
+ * Print an header with the name of each alternative.
+ */
+ disas_print_info(stdout, orig_insn, -2, NULL);
+
+ if (strlen(alt_name) > dalts[0].width)
+ dalts[0].width = strlen(alt_name);
+ printf("| %-*s ", dalts[0].width, alt_name);
+
+ for (i = 1; i < alt_count; i++)
+ printf("| %-*s ", dalts[i].width, dalts[i].name);
+
+ printf("\n");
+
+ /*
+ * Print instructions for each alternative.
+ */
+ offset_next = 0;
+ do {
+ offset = offset_next;
+ disas_print(stdout, orig_insn->sec, orig_insn->offset + offset,
+ -2, NULL);
+ offset_next = disas_alt_print_insn(dalts, alt_count, insn_count,
+ offset);
+ } while (offset_next > offset);
+}
+
+/*
+ * Print all alternatives one above the other.
+ */
+static void disas_alt_print_compact(char *alt_name, struct disas_alt *dalts,
+ int alt_count, int insn_count)
+{
+ struct instruction *orig_insn;
+ int width;
+ int i, j;
+ int len;
+
+ orig_insn = dalts[0].orig_insn;
+
+ len = disas_print(stdout, orig_insn->sec, orig_insn->offset, 0, NULL);
+ printf("%s\n", alt_name);
+
+ /*
+ * If all alternatives have a single instruction then print each
+ * alternative on a single line. Otherwise, print alternatives
+ * one above the other with a clear separation.
+ */
+
+ if (insn_count == 1) {
+ width = 0;
+ for (i = 0; i < alt_count; i++) {
+ if (dalts[i].width > width)
+ width = dalts[i].width;
+ }
+
+ for (i = 0; i < alt_count; i++) {
+ printf("%*s= %-*s (if %s)\n", len, "", width,
+ dalts[i].insn[0].str, dalts[i].name);
+ }
+
+ return;
+ }
+
+ for (i = 0; i < alt_count; i++) {
+ printf("%*s= %s\n", len, "", dalts[i].name);
+ for (j = 0; j < insn_count; j++) {
+ if (!dalts[i].insn[j].str)
+ break;
+ disas_print(stdout, orig_insn->sec,
+ orig_insn->offset + dalts[i].insn[j].offset, 0,
+ "| %s\n", dalts[i].insn[j].str);
+ }
+ printf("%*s|\n", len, "");
+ }
+}
+
+/*
+ * Trim NOPs in alternatives. This replaces trailing NOPs in alternatives
+ * with a single indication of the number of bytes covered with NOPs.
+ *
+ * Return the maximum numbers of instructions in all alternatives after
+ * trailing NOPs have been trimmed.
+ */
+static int disas_alt_trim_nops(struct disas_alt *dalts, int alt_count,
+ int insn_count)
+{
+ struct disas_alt *dalt;
+ int nops_count;
+ const char *s;
+ int offset;
+ int count;
+ int nops;
+ int i, j;
+
+ count = 0;
+ for (i = 0; i < alt_count; i++) {
+ offset = 0;
+ nops = 0;
+ nops_count = 0;
+ dalt = &dalts[i];
+ for (j = insn_count - 1; j >= 0; j--) {
+ if (!dalt->insn[j].str || !dalt->insn[j].nops)
+ break;
+ offset = dalt->insn[j].offset;
+ free(dalt->insn[j].str);
+ dalt->insn[j].offset = 0;
+ dalt->insn[j].str = NULL;
+ nops += dalt->insn[j].nops;
+ nops_count++;
+ }
+
+ /*
+ * All trailing NOPs have been removed. If there was a single
+ * NOP instruction then re-add it. If there was a block of
+ * NOPs then indicate the number of bytes than the block
+ * covers (nop*<number-of-bytes>).
+ */
+ if (nops_count) {
+ s = nops_count == 1 ? "" : "*";
+ dalt->insn[j + 1].str = strfmt("nop%s%d", s, nops);
+ dalt->insn[j + 1].offset = offset;
+ dalt->insn[j + 1].nops = nops;
+ j++;
+ }
+
+ if (j > count)
+ count = j;
+ }
+
+ return count + 1;
+}
+
+/*
+ * Disassemble an alternative.
+ *
+ * Return the last instruction in the default alternative so that
+ * disassembly can continue with the next instruction. Return NULL
+ * on error.
+ */
+static void *disas_alt(struct disas_context *dctx,
+ struct instruction *orig_insn)
+{
+ struct disas_alt dalts[DISAS_ALT_MAX] = { 0 };
+ struct instruction *last_insn = NULL;
+ struct alternative *alt;
+ struct disas_alt *dalt;
+ int insn_count = 0;
+ int alt_count = 0;
+ char *alt_name;
+ int count;
+ int i, j;
+ int err;
+
+ alt_name = strfmt("<%s.%lx>", disas_alt_type_name(orig_insn),
+ orig_insn->offset);
+ if (!alt_name) {
+ WARN("Failed to define name for alternative at instruction 0x%lx",
+ orig_insn->offset);
+ goto done;
+ }
+
+ /*
+ * Initialize and disassemble the default alternative.
+ */
+ err = disas_alt_init(&dalts[0], orig_insn, NULL);
+ if (err) {
+ WARN("%s: failed to initialize default alternative", alt_name);
+ goto done;
+ }
+
+ insn_count = disas_alt_default(dctx, &dalts[0]);
+ if (insn_count < 0) {
+ WARN("%s: failed to disassemble default alternative", alt_name);
+ goto done;
+ }
+
+ /*
+ * Initialize and disassemble all other alternatives.
+ */
+ i = 1;
+ for (alt = orig_insn->alts; alt; alt = alt->next) {
+ if (i >= DISAS_ALT_MAX) {
+ WARN("%s has more alternatives than supported", alt_name);
+ break;
+ }
+
+ dalt = &dalts[i];
+ err = disas_alt_init(dalt, orig_insn, alt);
+ if (err) {
+ WARN("%s: failed to disassemble alternative", alt_name);
+ goto done;
+ }
+
+ count = -1;
+ switch (dalt->alt->type) {
+ case ALT_TYPE_INSTRUCTIONS:
+ count = disas_alt_group(dctx, dalt);
+ break;
+ case ALT_TYPE_EX_TABLE:
+ count = disas_alt_extable(dalt);
+ break;
+ case ALT_TYPE_JUMP_TABLE:
+ count = disas_alt_jump(dalt);
+ break;
+ }
+ if (count < 0) {
+ WARN("%s: failed to disassemble alternative %s",
+ alt_name, dalt->name);
+ goto done;
+ }
+
+ insn_count = count > insn_count ? count : insn_count;
+ i++;
+ }
+ alt_count = i;
+
+ /*
+ * Print default and non-default alternatives.
+ */
+
+ insn_count = disas_alt_trim_nops(dalts, alt_count, insn_count);
+
+ if (opts.wide)
+ disas_alt_print_wide(alt_name, dalts, alt_count, insn_count);
+ else
+ disas_alt_print_compact(alt_name, dalts, alt_count, insn_count);
+
+ last_insn = orig_insn->alt_group ? orig_insn->alt_group->last_insn :
+ orig_insn;
+
+done:
+ for (i = 0; i < alt_count; i++) {
+ free(dalts[i].name);
+ for (j = 0; j < insn_count; j++)
+ free(dalts[i].insn[j].str);
+ }
+
+ free(alt_name);
+
+ return last_insn;
+}
+
+/*
+ * Disassemble a function.
+ */
+static void disas_func(struct disas_context *dctx, struct symbol *func)
+{
+ struct instruction *insn_start;
+ struct instruction *insn;
+
+ printf("%s:\n", func->name);
+ sym_for_each_insn(dctx->file, func, insn) {
+ if (insn->alts) {
+ insn_start = insn;
+ insn = disas_alt(dctx, insn);
+ if (insn)
+ continue;
+ /*
+ * There was an error with disassembling
+ * the alternative. Resume disassembling
+ * at the current instruction, this will
+ * disassemble the default alternative
+ * only and continue with the code after
+ * the alternative.
+ */
+ insn = insn_start;
+ }
+
+ DISAS_PRINSN(dctx, insn, 0);
+ }
+ printf("\n");
+}
+
+/*
+ * Disassemble all warned functions.
+ */
+void disas_warned_funcs(struct disas_context *dctx)
+{
+ struct symbol *sym;
+
+ if (!dctx)
+ return;
+
+ for_each_sym(dctx->file->elf, sym) {
+ if (sym->warned)
+ disas_func(dctx, sym);
+ }
+}
+
+void disas_funcs(struct disas_context *dctx)
+{
+ bool disas_all = !strcmp(opts.disas, "*");
+ struct section *sec;
+ struct symbol *sym;
+
+ for_each_sec(dctx->file->elf, sec) {
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ sec_for_each_sym(sec, sym) {
+ /*
+ * If the function had a warning and the verbose
+ * option is used then the function was already
+ * disassemble.
+ */
+ if (opts.verbose && sym->warned)
+ continue;
+
+ if (disas_all || fnmatch(opts.disas, sym->name, 0) == 0)
+ disas_func(dctx, sym);
+ }
+ }
+}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index ca5d77db692a..6a8ed9c62323 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -16,12 +16,17 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <libgen.h>
+#include <ctype.h>
#include <linux/interval_tree_generic.h>
#include <objtool/builtin.h>
-
#include <objtool/elf.h>
#include <objtool/warn.h>
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_UP_POW2(x) (1U << ((8 * sizeof(x)) - __builtin_clz((x) - 1U)))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
static inline u32 str_hash(const char *str)
{
return jhash(str, strlen(str), 0);
@@ -92,11 +97,12 @@ static inline unsigned long __sym_start(struct symbol *s)
static inline unsigned long __sym_last(struct symbol *s)
{
- return s->offset + s->len - 1;
+ return s->offset + (s->len ? s->len - 1 : 0);
}
INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last,
- __sym_start, __sym_last, static, __sym)
+ __sym_start, __sym_last, static inline __maybe_unused,
+ __sym)
#define __sym_for_each(_iter, _tree, _start, _end) \
for (_iter = __sym_iter_first((_tree), (_start), (_end)); \
@@ -108,7 +114,7 @@ struct symbol_hole {
};
/*
- * Find !section symbol where @offset is after it.
+ * Find the last symbol before @offset.
*/
static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
{
@@ -119,8 +125,7 @@ static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
return -1;
if (sh->key >= s->offset + s->len) {
- if (s->type != STT_SECTION)
- sh->sym = s;
+ sh->sym = s;
return 1;
}
@@ -167,11 +172,11 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *sym;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->offset == offset && iter->type != STT_SECTION)
- return iter;
+ __sym_for_each(sym, tree, offset, offset) {
+ if (sym->offset == offset && !is_sec_sym(sym))
+ return sym->alias;
}
return NULL;
@@ -180,11 +185,11 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *func;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->offset == offset && iter->type == STT_FUNC)
- return iter;
+ __sym_for_each(func, tree, offset, offset) {
+ if (func->offset == offset && is_func_sym(func))
+ return func->alias;
}
return NULL;
@@ -193,14 +198,29 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *sym = NULL, *tmp;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->type != STT_SECTION)
- return iter;
+ __sym_for_each(tmp, tree, offset, offset) {
+ if (tmp->len) {
+ if (!sym) {
+ sym = tmp;
+ continue;
+ }
+
+ if (sym->offset != tmp->offset || sym->len != tmp->len) {
+ /*
+ * In the rare case of overlapping symbols,
+ * pick the smaller one.
+ *
+ * TODO: outlaw overlapping symbols
+ */
+ if (tmp->len < sym->len)
+ sym = tmp;
+ }
+ }
}
- return NULL;
+ return sym ? sym->alias : NULL;
}
/*
@@ -246,11 +266,11 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
struct symbol *find_func_containing(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *func;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->type == STT_FUNC)
- return iter;
+ __sym_for_each(func, tree, offset, offset) {
+ if (is_func_sym(func))
+ return func->alias;
}
return NULL;
@@ -268,6 +288,35 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
return NULL;
}
+/* Find local symbol with matching STT_FILE */
+static struct symbol *find_local_symbol_by_file_and_name(const struct elf *elf,
+ struct symbol *file,
+ const char *name)
+{
+ struct symbol *sym;
+
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+ if (sym->bind == STB_LOCAL && sym->file == file &&
+ !strcmp(sym->name, name)) {
+ return sym;
+ }
+ }
+
+ return NULL;
+}
+
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name)
+{
+ struct symbol *sym;
+
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+ if (!strcmp(sym->name, name) && !is_local_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len)
{
@@ -358,14 +407,14 @@ static int read_sections(struct elf *elf)
return -1;
}
- if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) {
+ if (sec_size(sec) != 0 && !is_dwarf_section(sec)) {
sec->data = elf_getdata(s, NULL);
if (!sec->data) {
ERROR_ELF("elf_getdata");
return -1;
}
if (sec->data->d_off != 0 ||
- sec->data->d_size != sec->sh.sh_size) {
+ sec->data->d_size != sec_size(sec)) {
ERROR("unexpected data attributes for %s", sec->name);
return -1;
}
@@ -393,7 +442,38 @@ static int read_sections(struct elf *elf)
return 0;
}
-static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+static const char *demangle_name(struct symbol *sym)
+{
+ char *str;
+
+ if (!is_local_sym(sym))
+ return sym->name;
+
+ if (!is_func_sym(sym) && !is_object_sym(sym))
+ return sym->name;
+
+ if (!strstarts(sym->name, "__UNIQUE_ID_") && !strchr(sym->name, '.'))
+ return sym->name;
+
+ str = strdup(sym->name);
+ if (!str) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ for (int i = strlen(str) - 1; i >= 0; i--) {
+ char c = str[i];
+
+ if (!isdigit(c) && c != '.') {
+ str[i + 1] = '\0';
+ break;
+ }
+ }
+
+ return str;
+}
+
+static int elf_add_symbol(struct elf *elf, struct symbol *sym)
{
struct list_head *entry;
struct rb_node *pnode;
@@ -405,14 +485,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
sym->type = GELF_ST_TYPE(sym->sym.st_info);
sym->bind = GELF_ST_BIND(sym->sym.st_info);
- if (sym->type == STT_FILE)
+ if (is_file_sym(sym))
elf->num_files++;
sym->offset = sym->sym.st_value;
sym->len = sym->sym.st_size;
__sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) {
- if (iter->offset == sym->offset && iter->type == sym->type)
+ if (!is_undef_sym(iter) && iter->offset == sym->offset &&
+ iter->type == sym->type && iter->len == sym->len)
iter->alias = sym;
}
@@ -423,21 +504,44 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
+
+ list_add_tail(&sym->global_list, &elf->symbols);
elf_hash_add(symbol, &sym->hash, sym->idx);
elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
- /*
- * Don't store empty STT_NOTYPE symbols in the rbtree. They
- * can exist within a function, confusing the sorting.
- */
- if (!sym->len)
- __sym_remove(sym, &sym->sec->symbol_tree);
+ if (is_func_sym(sym) &&
+ (strstarts(sym->name, "__pfx_") ||
+ strstarts(sym->name, "__cfi_") ||
+ strstarts(sym->name, "__pi___pfx_") ||
+ strstarts(sym->name, "__pi___cfi_")))
+ sym->prefix = 1;
+
+ if (strstarts(sym->name, ".klp.sym"))
+ sym->klp = 1;
+
+ if (!sym->klp && !is_sec_sym(sym) && strstr(sym->name, ".cold")) {
+ sym->cold = 1;
+
+ /*
+ * Clang doesn't mark cold subfunctions as STT_FUNC, which
+ * breaks several objtool assumptions. Fake it.
+ */
+ sym->type = STT_FUNC;
+ }
+
+ sym->pfunc = sym->cfunc = sym;
+
+ sym->demangled_name = demangle_name(sym);
+ if (!sym->demangled_name)
+ return -1;
+
+ return 0;
}
static int read_symbols(struct elf *elf)
{
struct section *symtab, *symtab_shndx, *sec;
- struct symbol *sym, *pfunc;
+ struct symbol *sym, *pfunc, *file = NULL;
int symbols_nr, i;
char *coldstr;
Elf_Data *shndx_data = NULL;
@@ -469,6 +573,9 @@ static int read_symbols(struct elf *elf)
ERROR_GLIBC("calloc");
return -1;
}
+
+ INIT_LIST_HEAD(&elf->symbols);
+
for (i = 0; i < symbols_nr; i++) {
sym = &elf->symbol_data[i];
@@ -477,14 +584,14 @@ static int read_symbols(struct elf *elf)
if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
&shndx)) {
ERROR_ELF("gelf_getsymshndx");
- goto err;
+ return -1;
}
sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
sym->sym.st_name);
if (!sym->name) {
ERROR_ELF("elf_strptr");
- goto err;
+ return -1;
}
if ((sym->sym.st_shndx > SHN_UNDEF &&
@@ -496,7 +603,7 @@ static int read_symbols(struct elf *elf)
sym->sec = find_section_by_index(elf, shndx);
if (!sym->sec) {
ERROR("couldn't find section for symbol %s", sym->name);
- goto err;
+ return -1;
}
if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
sym->name = sym->sec->name;
@@ -505,7 +612,13 @@ static int read_symbols(struct elf *elf)
} else
sym->sec = find_section_by_index(elf, 0);
- elf_add_symbol(elf, sym);
+ if (elf_add_symbol(elf, sym))
+ return -1;
+
+ if (sym->type == STT_FILE)
+ file = sym;
+ else if (sym->bind == STB_LOCAL)
+ sym->file = file;
}
if (opts.stats) {
@@ -518,18 +631,15 @@ static int read_symbols(struct elf *elf)
sec_for_each_sym(sec, sym) {
char *pname;
size_t pnamelen;
- if (sym->type != STT_FUNC)
- continue;
-
- if (sym->pfunc == NULL)
- sym->pfunc = sym;
- if (sym->cfunc == NULL)
- sym->cfunc = sym;
+ if (!sym->cold)
+ continue;
coldstr = strstr(sym->name, ".cold");
- if (!coldstr)
- continue;
+ if (!coldstr) {
+ ERROR("%s(): cold subfunction without \".cold\"?", sym->name);
+ return -1;
+ }
pnamelen = coldstr - sym->name;
pname = strndup(sym->name, pnamelen);
@@ -538,7 +648,9 @@ static int read_symbols(struct elf *elf)
return -1;
}
- pfunc = find_symbol_by_name(elf, pname);
+ pfunc = find_local_symbol_by_file_and_name(elf, sym->file, pname);
+ if (!pfunc)
+ pfunc = find_global_symbol_by_name(elf, pname);
free(pname);
if (!pfunc) {
@@ -546,8 +658,9 @@ static int read_symbols(struct elf *elf)
return -1;
}
- sym->pfunc = pfunc;
+ sym->pfunc = pfunc->alias;
pfunc->cfunc = sym;
+ pfunc->alias->cfunc = sym;
/*
* Unfortunately, -fnoreorder-functions puts the child
@@ -566,10 +679,6 @@ static int read_symbols(struct elf *elf)
}
return 0;
-
-err:
- free(sym);
- return -1;
}
static int mark_group_syms(struct elf *elf)
@@ -583,7 +692,7 @@ static int mark_group_syms(struct elf *elf)
return -1;
}
- list_for_each_entry(sec, &elf->sections, list) {
+ for_each_sec(elf, sec) {
if (sec->sh.sh_type == SHT_GROUP &&
sec->sh.sh_link == symtab->idx) {
sym = find_symbol_by_index(elf, sec->sh.sh_info);
@@ -624,7 +733,7 @@ static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym)
static int elf_update_symbol(struct elf *elf, struct section *symtab,
struct section *symtab_shndx, struct symbol *sym)
{
- Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
+ Elf32_Word shndx;
Elf_Data *symtab_data = NULL, *shndx_data = NULL;
Elf64_Xword entsize = symtab->sh.sh_entsize;
int max_idx, idx = sym->idx;
@@ -632,8 +741,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE &&
sym->sym.st_shndx != SHN_XINDEX;
- if (is_special_shndx)
- shndx = sym->sym.st_shndx;
+ shndx = is_special_shndx ? sym->sym.st_shndx : sym->sec->idx;
s = elf_getscn(elf->elf, symtab->idx);
if (!s) {
@@ -731,7 +839,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
}
/* setup extended section index magic and write the symbol */
- if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) {
+ if (shndx < SHN_LORESERVE || is_special_shndx) {
sym->sym.st_shndx = shndx;
if (!shndx_data)
shndx = 0;
@@ -751,24 +859,58 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
return 0;
}
-static struct symbol *
-__elf_create_symbol(struct elf *elf, struct symbol *sym)
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+ struct section *sec, unsigned int bind,
+ unsigned int type, unsigned long offset,
+ size_t size)
{
struct section *symtab, *symtab_shndx;
Elf32_Word first_non_local, new_idx;
- struct symbol *old;
+ struct symbol *old, *sym;
- symtab = find_section_by_name(elf, ".symtab");
- if (symtab) {
- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ sym->name = strdup(name);
+ if (!sym->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ if (type != STT_SECTION) {
+ sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+ if (sym->sym.st_name == -1)
+ return NULL;
+ }
+
+ if (sec) {
+ sym->sec = sec;
} else {
+ sym->sec = find_section_by_index(elf, 0);
+ if (!sym->sec) {
+ ERROR("no NULL section");
+ return NULL;
+ }
+ }
+
+ sym->sym.st_info = GELF_ST_INFO(bind, type);
+ sym->sym.st_value = offset;
+ sym->sym.st_size = size;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
ERROR("no .symtab");
return NULL;
}
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+
new_idx = sec_num_entries(symtab);
- if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
+ if (bind != STB_LOCAL)
goto non_local;
/*
@@ -806,10 +948,8 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
non_local:
sym->idx = new_idx;
- if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
- ERROR("elf_update_symbol");
+ if (sym->idx && elf_update_symbol(elf, symtab, symtab_shndx, sym))
return NULL;
- }
symtab->sh.sh_size += symtab->sh.sh_entsize;
mark_sec_changed(elf, symtab, true);
@@ -819,70 +959,28 @@ non_local:
mark_sec_changed(elf, symtab_shndx, true);
}
- return sym;
-}
-
-static struct symbol *
-elf_create_section_symbol(struct elf *elf, struct section *sec)
-{
- struct symbol *sym = calloc(1, sizeof(*sym));
-
- if (!sym) {
- ERROR_GLIBC("malloc");
+ if (elf_add_symbol(elf, sym))
return NULL;
- }
-
- sym->name = sec->name;
- sym->sec = sec;
-
- // st_name 0
- sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
- // st_other 0
- // st_value 0
- // st_size 0
-
- sym = __elf_create_symbol(elf, sym);
- if (sym)
- elf_add_symbol(elf, sym);
return sym;
}
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str);
-
-struct symbol *
-elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec)
{
struct symbol *sym = calloc(1, sizeof(*sym));
- size_t namelen = strlen(orig->name) + sizeof("__pfx_");
- char *name = malloc(namelen);
- if (!sym || !name) {
- ERROR_GLIBC("malloc");
+ sym = elf_create_symbol(elf, sec->name, sec, STB_LOCAL, STT_SECTION, 0, 0);
+ if (!sym)
return NULL;
- }
- snprintf(name, namelen, "__pfx_%s", orig->name);
-
- sym->name = name;
- sym->sec = orig->sec;
-
- sym->sym.st_name = elf_add_string(elf, NULL, name);
- sym->sym.st_info = orig->sym.st_info;
- sym->sym.st_value = orig->sym.st_value - size;
- sym->sym.st_size = size;
-
- sym = __elf_create_symbol(elf, sym);
- if (sym)
- elf_add_symbol(elf, sym);
+ sec->sym = sym;
return sym;
}
-static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
- unsigned int reloc_idx,
- unsigned long offset, struct symbol *sym,
- s64 addend, unsigned int type)
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+ unsigned int reloc_idx, unsigned long offset,
+ struct symbol *sym, s64 addend, unsigned int type)
{
struct reloc *reloc, empty = { 0 };
@@ -922,9 +1020,9 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
unsigned long insn_off)
{
struct symbol *sym = insn_sec->sym;
- int addend = insn_off;
+ s64 addend = insn_off;
- if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) {
+ if (!is_text_sec(insn_sec)) {
ERROR("bad call to %s() for data symbol %s", __func__, sym->name);
return NULL;
}
@@ -939,8 +1037,6 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
sym = elf_create_section_symbol(elf, insn_sec);
if (!sym)
return NULL;
-
- insn_sec->sym = sym;
}
return elf_init_reloc(elf, sec->rsec, reloc_idx, offset, sym, addend,
@@ -953,7 +1049,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
struct symbol *sym,
s64 addend)
{
- if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) {
+ if (is_text_sec(sec)) {
ERROR("bad call to %s() for text symbol %s", __func__, sym->name);
return NULL;
}
@@ -986,12 +1082,16 @@ static int read_relocs(struct elf *elf)
rsec->base->rsec = rsec;
- nr_reloc = 0;
+ /* nr_alloc_relocs=0: libelf owns d_buf */
+ rsec->nr_alloc_relocs = 0;
+
rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc));
if (!rsec->relocs) {
ERROR_GLIBC("calloc");
return -1;
}
+
+ nr_reloc = 0;
for (i = 0; i < sec_num_entries(rsec); i++) {
reloc = &rsec->relocs[i];
@@ -1044,6 +1144,12 @@ struct elf *elf_open_read(const char *name, int flags)
goto err;
}
+ elf->name = strdup(name);
+ if (!elf->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
if ((flags & O_ACCMODE) == O_RDONLY)
cmd = ELF_C_READ_MMAP;
else if ((flags & O_ACCMODE) == O_RDWR)
@@ -1081,11 +1187,142 @@ err:
return NULL;
}
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name)
{
- Elf_Data *data;
- Elf_Scn *s;
- int len;
+ struct section *null, *symtab, *strtab, *shstrtab;
+ char *dir, *base, *tmp_name;
+ struct symbol *sym;
+ struct elf *elf;
+
+ elf_version(EV_CURRENT);
+
+ elf = calloc(1, sizeof(*elf));
+ if (!elf) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&elf->sections);
+
+ dir = strdup(name);
+ if (!dir) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ dir = dirname(dir);
+
+ base = strdup(name);
+ if (!base) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ base = basename(base);
+
+ tmp_name = malloc(256);
+ if (!tmp_name) {
+ ERROR_GLIBC("malloc");
+ return NULL;
+ }
+
+ snprintf(tmp_name, 256, "%s/%s.XXXXXX", dir, base);
+
+ elf->fd = mkstemp(tmp_name);
+ if (elf->fd == -1) {
+ ERROR_GLIBC("can't create tmp file");
+ exit(1);
+ }
+
+ elf->tmp_name = tmp_name;
+
+ elf->name = strdup(name);
+ if (!elf->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ elf->elf = elf_begin(elf->fd, ELF_C_WRITE, NULL);
+ if (!elf->elf) {
+ ERROR_ELF("elf_begin");
+ return NULL;
+ }
+
+ if (!gelf_newehdr(elf->elf, ELFCLASS64)) {
+ ERROR_ELF("gelf_newehdr");
+ return NULL;
+ }
+
+ memcpy(&elf->ehdr, ehdr, sizeof(elf->ehdr));
+
+ if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+ ERROR_ELF("gelf_update_ehdr");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&elf->symbols);
+
+ if (!elf_alloc_hash(section, 1000) ||
+ !elf_alloc_hash(section_name, 1000) ||
+ !elf_alloc_hash(symbol, 10000) ||
+ !elf_alloc_hash(symbol_name, 10000) ||
+ !elf_alloc_hash(reloc, 100000))
+ return NULL;
+
+ null = elf_create_section(elf, NULL, 0, 0, SHT_NULL, 0, 0);
+ shstrtab = elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+ strtab = elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+
+ if (!null || !shstrtab || !strtab)
+ return NULL;
+
+ null->name = "";
+ shstrtab->name = ".shstrtab";
+ strtab->name = ".strtab";
+
+ null->sh.sh_name = elf_add_string(elf, shstrtab, null->name);
+ shstrtab->sh.sh_name = elf_add_string(elf, shstrtab, shstrtab->name);
+ strtab->sh.sh_name = elf_add_string(elf, shstrtab, strtab->name);
+
+ if (null->sh.sh_name == -1 || shstrtab->sh.sh_name == -1 || strtab->sh.sh_name == -1)
+ return NULL;
+
+ elf_hash_add(section_name, &null->name_hash, str_hash(null->name));
+ elf_hash_add(section_name, &strtab->name_hash, str_hash(strtab->name));
+ elf_hash_add(section_name, &shstrtab->name_hash, str_hash(shstrtab->name));
+
+ if (elf_add_string(elf, strtab, "") == -1)
+ return NULL;
+
+ symtab = elf_create_section(elf, ".symtab", 0x18, 0x18, SHT_SYMTAB, 0x8, 0);
+ if (!symtab)
+ return NULL;
+
+ symtab->sh.sh_link = strtab->idx;
+ symtab->sh.sh_info = 1;
+
+ elf->ehdr.e_shstrndx = shstrtab->idx;
+ if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+ ERROR_ELF("gelf_update_ehdr");
+ return NULL;
+ }
+
+ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ sym->name = "";
+ sym->sec = null;
+ elf_add_symbol(elf, sym);
+
+ return elf;
+}
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str)
+{
+ unsigned int offset;
if (!strtab)
strtab = find_section_by_name(elf, ".strtab");
@@ -1094,76 +1331,109 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
return -1;
}
- s = elf_getscn(elf->elf, strtab->idx);
+ if (!strtab->sh.sh_addralign) {
+ ERROR("'%s': invalid sh_addralign", strtab->name);
+ return -1;
+ }
+
+ offset = ALIGN_UP(strtab->sh.sh_size, strtab->sh.sh_addralign);
+
+ if (!elf_add_data(elf, strtab, str, strlen(str) + 1))
+ return -1;
+
+ return offset;
+}
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_t size)
+{
+ unsigned long offset;
+ Elf_Scn *s;
+
+ if (!sec->sh.sh_addralign) {
+ ERROR("'%s': invalid sh_addralign", sec->name);
+ return NULL;
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
if (!s) {
ERROR_ELF("elf_getscn");
- return -1;
+ return NULL;
}
- data = elf_newdata(s);
- if (!data) {
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
ERROR_ELF("elf_newdata");
- return -1;
+ return NULL;
}
- data->d_buf = str;
- data->d_size = strlen(str) + 1;
- data->d_align = 1;
+ sec->data->d_buf = calloc(1, size);
+ if (!sec->data->d_buf) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
- len = strtab->sh.sh_size;
- strtab->sh.sh_size += data->d_size;
+ if (data)
+ memcpy(sec->data->d_buf, data, size);
- mark_sec_changed(elf, strtab, true);
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
- return len;
+ offset = ALIGN_UP(sec->sh.sh_size, sec->sh.sh_addralign);
+ sec->sh.sh_size = offset + size;
+
+ mark_sec_changed(elf, sec, true);
+
+ return sec->data->d_buf;
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, unsigned int nr)
+ size_t size, size_t entsize,
+ unsigned int type, unsigned int align,
+ unsigned int flags)
{
struct section *sec, *shstrtab;
- size_t size = entsize * nr;
Elf_Scn *s;
- sec = malloc(sizeof(*sec));
+ if (name && find_section_by_name(elf, name)) {
+ ERROR("section '%s' already exists", name);
+ return NULL;
+ }
+
+ sec = calloc(1, sizeof(*sec));
if (!sec) {
- ERROR_GLIBC("malloc");
+ ERROR_GLIBC("calloc");
return NULL;
}
- memset(sec, 0, sizeof(*sec));
INIT_LIST_HEAD(&sec->symbol_list);
+ /* don't actually create the section, just the data structures */
+ if (type == SHT_NULL)
+ goto add;
+
s = elf_newscn(elf->elf);
if (!s) {
ERROR_ELF("elf_newscn");
return NULL;
}
- sec->name = strdup(name);
- if (!sec->name) {
- ERROR_GLIBC("strdup");
- return NULL;
- }
-
sec->idx = elf_ndxscn(s);
- sec->data = elf_newdata(s);
- if (!sec->data) {
- ERROR_ELF("elf_newdata");
- return NULL;
- }
+ if (size) {
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ ERROR_ELF("elf_newdata");
+ return NULL;
+ }
- sec->data->d_size = size;
- sec->data->d_align = 1;
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
- if (size) {
- sec->data->d_buf = malloc(size);
+ sec->data->d_buf = calloc(1, size);
if (!sec->data->d_buf) {
- ERROR_GLIBC("malloc");
+ ERROR_GLIBC("calloc");
return NULL;
}
- memset(sec->data->d_buf, 0, size);
}
if (!gelf_getshdr(s, &sec->sh)) {
@@ -1173,34 +1443,152 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_size = size;
sec->sh.sh_entsize = entsize;
- sec->sh.sh_type = SHT_PROGBITS;
- sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
-
- /* Add section name to .shstrtab (or .strtab for Clang) */
- shstrtab = find_section_by_name(elf, ".shstrtab");
- if (!shstrtab)
- shstrtab = find_section_by_name(elf, ".strtab");
- if (!shstrtab) {
- ERROR("can't find .shstrtab or .strtab section");
- return NULL;
+ sec->sh.sh_type = type;
+ sec->sh.sh_addralign = align;
+ sec->sh.sh_flags = flags;
+
+ if (name) {
+ sec->name = strdup(name);
+ if (!sec->name) {
+ ERROR("strdup");
+ return NULL;
+ }
+
+ /* Add section name to .shstrtab (or .strtab for Clang) */
+ shstrtab = find_section_by_name(elf, ".shstrtab");
+ if (!shstrtab) {
+ shstrtab = find_section_by_name(elf, ".strtab");
+ if (!shstrtab) {
+ ERROR("can't find .shstrtab or .strtab");
+ return NULL;
+ }
+ }
+ sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+ if (sec->sh.sh_name == -1)
+ return NULL;
+
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
}
- sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
- if (sec->sh.sh_name == -1)
- return NULL;
+add:
list_add_tail(&sec->list, &elf->sections);
elf_hash_add(section, &sec->hash, sec->idx);
- elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
mark_sec_changed(elf, sec, true);
return sec;
}
-static struct section *elf_create_rela_section(struct elf *elf,
- struct section *sec,
- unsigned int reloc_nr)
+static int elf_alloc_reloc(struct elf *elf, struct section *rsec)
+{
+ struct reloc *old_relocs, *old_relocs_end, *new_relocs;
+ unsigned int nr_relocs_old = sec_num_entries(rsec);
+ unsigned int nr_relocs_new = nr_relocs_old + 1;
+ unsigned long nr_alloc;
+ struct symbol *sym;
+
+ if (!rsec->data) {
+ rsec->data = elf_newdata(elf_getscn(elf->elf, rsec->idx));
+ if (!rsec->data) {
+ ERROR_ELF("elf_newdata");
+ return -1;
+ }
+
+ rsec->data->d_align = 1;
+ rsec->data->d_type = ELF_T_RELA;
+ rsec->data->d_buf = NULL;
+ }
+
+ rsec->data->d_size = nr_relocs_new * elf_rela_size(elf);
+ rsec->sh.sh_size = rsec->data->d_size;
+
+ nr_alloc = MAX(64, ALIGN_UP_POW2(nr_relocs_new));
+ if (nr_alloc <= rsec->nr_alloc_relocs)
+ return 0;
+
+ if (rsec->data->d_buf && !rsec->nr_alloc_relocs) {
+ void *orig_buf = rsec->data->d_buf;
+
+ /*
+ * The original d_buf is owned by libelf so it can't be
+ * realloced.
+ */
+ rsec->data->d_buf = malloc(nr_alloc * elf_rela_size(elf));
+ if (!rsec->data->d_buf) {
+ ERROR_GLIBC("malloc");
+ return -1;
+ }
+ memcpy(rsec->data->d_buf, orig_buf,
+ nr_relocs_old * elf_rela_size(elf));
+ } else {
+ rsec->data->d_buf = realloc(rsec->data->d_buf,
+ nr_alloc * elf_rela_size(elf));
+ if (!rsec->data->d_buf) {
+ ERROR_GLIBC("realloc");
+ return -1;
+ }
+ }
+
+ rsec->nr_alloc_relocs = nr_alloc;
+
+ old_relocs = rsec->relocs;
+ new_relocs = calloc(nr_alloc, sizeof(struct reloc));
+ if (!new_relocs) {
+ ERROR_GLIBC("calloc");
+ return -1;
+ }
+
+ if (!old_relocs)
+ goto done;
+
+ /*
+ * The struct reloc's address has changed. Update all the symbols and
+ * relocs which reference it.
+ */
+
+ old_relocs_end = &old_relocs[nr_relocs_old];
+ for_each_sym(elf, sym) {
+ struct reloc *reloc;
+
+ reloc = sym->relocs;
+ if (!reloc)
+ continue;
+
+ if (reloc >= old_relocs && reloc < old_relocs_end)
+ sym->relocs = &new_relocs[reloc - old_relocs];
+
+ while (1) {
+ struct reloc *next_reloc = sym_next_reloc(reloc);
+
+ if (!next_reloc)
+ break;
+
+ if (next_reloc >= old_relocs && next_reloc < old_relocs_end)
+ set_sym_next_reloc(reloc, &new_relocs[next_reloc - old_relocs]);
+
+ reloc = next_reloc;
+ }
+ }
+
+ memcpy(new_relocs, old_relocs, nr_relocs_old * sizeof(struct reloc));
+
+ for (int i = 0; i < nr_relocs_old; i++) {
+ struct reloc *old = &old_relocs[i];
+ struct reloc *new = &new_relocs[i];
+ u32 key = reloc_hash(old);
+
+ elf_hash_del(reloc, &old->hash, key);
+ elf_hash_add(reloc, &new->hash, key);
+ }
+
+ free(old_relocs);
+done:
+ rsec->relocs = new_relocs;
+ return 0;
+}
+
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+ unsigned int nr_relocs)
{
struct section *rsec;
char *rsec_name;
@@ -1213,41 +1601,72 @@ static struct section *elf_create_rela_section(struct elf *elf,
strcpy(rsec_name, ".rela");
strcat(rsec_name, sec->name);
- rsec = elf_create_section(elf, rsec_name, elf_rela_size(elf), reloc_nr);
+ rsec = elf_create_section(elf, rsec_name, nr_relocs * elf_rela_size(elf),
+ elf_rela_size(elf), SHT_RELA, elf_addr_size(elf),
+ SHF_INFO_LINK);
free(rsec_name);
if (!rsec)
return NULL;
- rsec->data->d_type = ELF_T_RELA;
- rsec->sh.sh_type = SHT_RELA;
- rsec->sh.sh_addralign = elf_addr_size(elf);
- rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
- rsec->sh.sh_info = sec->idx;
- rsec->sh.sh_flags = SHF_INFO_LINK;
+ if (nr_relocs) {
+ rsec->data->d_type = ELF_T_RELA;
- rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc));
- if (!rsec->relocs) {
- ERROR_GLIBC("calloc");
- return NULL;
+ rsec->nr_alloc_relocs = nr_relocs;
+ rsec->relocs = calloc(nr_relocs, sizeof(struct reloc));
+ if (!rsec->relocs) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
}
+ rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+ rsec->sh.sh_info = sec->idx;
+
sec->rsec = rsec;
rsec->base = sec;
return rsec;
}
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ struct symbol *sym, s64 addend,
+ unsigned int type)
+{
+ struct section *rsec = sec->rsec;
+
+ if (!rsec) {
+ rsec = elf_create_rela_section(elf, sec, 0);
+ if (!rsec)
+ return NULL;
+ }
+
+ if (find_reloc_by_dest(elf, sec, offset)) {
+ ERROR_FUNC(sec, offset, "duplicate reloc");
+ return NULL;
+ }
+
+ if (elf_alloc_reloc(elf, rsec))
+ return NULL;
+
+ mark_sec_changed(elf, rsec, true);
+
+ return elf_init_reloc(elf, rsec, sec_num_entries(rsec) - 1, offset, sym,
+ addend, type);
+}
+
struct section *elf_create_section_pair(struct elf *elf, const char *name,
size_t entsize, unsigned int nr,
- unsigned int reloc_nr)
+ unsigned int nr_relocs)
{
struct section *sec;
- sec = elf_create_section(elf, name, entsize, nr);
+ sec = elf_create_section(elf, name, nr * entsize, entsize,
+ SHT_PROGBITS, 1, SHF_ALLOC);
if (!sec)
return NULL;
- if (!elf_create_rela_section(elf, sec, reloc_nr))
+ if (!elf_create_rela_section(elf, sec, nr_relocs))
return NULL;
return sec;
@@ -1282,7 +1701,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
*/
static int elf_truncate_section(struct elf *elf, struct section *sec)
{
- u64 size = sec->sh.sh_size;
+ u64 size = sec_size(sec);
bool truncated = false;
Elf_Data *data = NULL;
Elf_Scn *s;
@@ -1296,7 +1715,6 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
for (;;) {
/* get next data descriptor for the relevant section */
data = elf_getdata(s, data);
-
if (!data) {
if (size) {
ERROR("end of section data but non-zero size left\n");
@@ -1332,8 +1750,8 @@ int elf_write(struct elf *elf)
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
- if (sec->truncate)
- elf_truncate_section(elf, sec);
+ if (sec->truncate && elf_truncate_section(elf, sec))
+ return -1;
if (sec_changed(sec)) {
s = elf_getscn(elf->elf, sec->idx);
@@ -1366,7 +1784,7 @@ int elf_write(struct elf *elf)
return 0;
}
-void elf_close(struct elf *elf)
+int elf_close(struct elf *elf)
{
if (elf->elf)
elf_end(elf->elf);
@@ -1374,8 +1792,12 @@ void elf_close(struct elf *elf)
if (elf->fd > 0)
close(elf->fd);
+ if (elf->tmp_name && rename(elf->tmp_name, elf->name))
+ return -1;
+
/*
* NOTE: All remaining allocations are leaked on purpose. Objtool is
* about to exit anyway.
*/
+ return 0;
}
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..8866158975fc 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -71,7 +71,7 @@ struct stack_op {
struct instruction;
-int arch_ftrace_match(char *name);
+int arch_ftrace_match(const char *name);
void arch_initial_func_cfi_state(struct cfi_init_state *state);
@@ -83,7 +83,8 @@ bool arch_callee_saved_reg(unsigned char reg);
unsigned long arch_jump_destination(struct instruction *insn);
-unsigned long arch_dest_reloc_offset(int addend);
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc);
+u64 arch_adjusted_addend(struct reloc *reloc);
const char *arch_nop_insn(int len);
const char *arch_ret_insn(int len);
@@ -97,8 +98,20 @@ bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
unsigned int arch_reloc_size(struct reloc *reloc);
unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
+extern const char *arch_reg_name[CFI_NUM_REGS];
+
+#ifdef DISAS
+
+#include <bfd.h>
+#include <dis-asm.h>
+
+int arch_disas_info_init(struct disassemble_info *dinfo);
+
+#endif /* DISAS */
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 6b08666fa69d..b9e229ed4dc0 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,12 +9,15 @@
struct opts {
/* actions: */
+ bool cfi;
+ bool checksum;
bool dump_orc;
bool hack_jump_label;
bool hack_noinstr;
bool hack_skylake;
bool ibt;
bool mcount;
+ bool noabs;
bool noinstr;
bool orc;
bool retpoline;
@@ -25,10 +28,12 @@ struct opts {
bool static_call;
bool uaccess;
int prefix;
- bool cfi;
+ const char *disas;
/* options: */
bool backtrace;
+ bool backup;
+ const char *debug_checksum;
bool dryrun;
bool link;
bool mnop;
@@ -37,8 +42,10 @@ struct opts {
const char *output;
bool sec_address;
bool stats;
+ const char *trace;
bool verbose;
bool werror;
+ bool wide;
};
extern struct opts opts;
@@ -47,6 +54,8 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
int objtool_run(int argc, const char **argv);
-void print_args(void);
+int make_backup(void);
+
+int cmd_klp(int argc, const char **argv);
#endif /* _BUILTIN_H */
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 00fb745e7233..2e1346ad5e92 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -36,6 +36,19 @@ struct alt_group {
struct cfi_state **cfi;
bool ignore;
+ unsigned int feature;
+};
+
+enum alternative_type {
+ ALT_TYPE_INSTRUCTIONS,
+ ALT_TYPE_JUMP_TABLE,
+ ALT_TYPE_EX_TABLE,
+};
+
+struct alternative {
+ struct alternative *next;
+ struct instruction *insn;
+ enum alternative_type type;
};
#define INSN_CHUNK_BITS 8
@@ -64,8 +77,11 @@ struct instruction {
noendbr : 1,
unret : 1,
visited : 4,
- no_reloc : 1;
- /* 10 bit hole */
+ no_reloc : 1,
+ hole : 1,
+ fake : 1,
+ trace : 1;
+ /* 9 bit hole */
struct alt_group *alt_group;
struct instruction *jump_dest;
@@ -115,6 +131,15 @@ static inline bool is_jump(struct instruction *insn)
return is_static_jump(insn) || is_dynamic_jump(insn);
}
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+ if (insn->type == INSN_JUMP_DYNAMIC ||
+ insn->type == INSN_CALL_DYNAMIC)
+ return NULL;
+
+ return insn->_call_dest;
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
@@ -125,4 +150,14 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruc
insn && insn->sec == _sec; \
insn = next_insn_same_sec(file, insn))
+#define sym_for_each_insn(file, sym, insn) \
+ for (insn = find_insn(file, sym->sec, sym->offset); \
+ insn && insn->offset < sym->offset + sym->len; \
+ insn = next_insn_same_sec(file, insn))
+
+const char *objtool_disas_insn(struct instruction *insn);
+
+extern size_t sym_name_max_len;
+extern struct disas_context *objtool_disas_ctx;
+
#endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/checksum.h b/tools/objtool/include/objtool/checksum.h
new file mode 100644
index 000000000000..7fe21608722a
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_CHECKSUM_H
+#define _OBJTOOL_CHECKSUM_H
+
+#include <objtool/elf.h>
+
+#ifdef BUILD_KLP
+
+static inline void checksum_init(struct symbol *func)
+{
+ if (func && !func->csum.state) {
+ func->csum.state = XXH3_createState();
+ XXH3_64bits_reset(func->csum.state);
+ }
+}
+
+static inline void checksum_update(struct symbol *func,
+ struct instruction *insn,
+ const void *data, size_t size)
+{
+ XXH3_64bits_update(func->csum.state, data, size);
+ dbg_checksum(func, insn, XXH3_64bits_digest(func->csum.state));
+}
+
+static inline void checksum_finish(struct symbol *func)
+{
+ if (func && func->csum.state) {
+ func->csum.checksum = XXH3_64bits_digest(func->csum.state);
+ func->csum.state = NULL;
+ }
+}
+
+#else /* !BUILD_KLP */
+
+static inline void checksum_init(struct symbol *func) {}
+static inline void checksum_update(struct symbol *func,
+ struct instruction *insn,
+ const void *data, size_t size) {}
+static inline void checksum_finish(struct symbol *func) {}
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_H */
diff --git a/tools/objtool/include/objtool/checksum_types.h b/tools/objtool/include/objtool/checksum_types.h
new file mode 100644
index 000000000000..507efdd8ab5b
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _OBJTOOL_CHECKSUM_TYPES_H
+#define _OBJTOOL_CHECKSUM_TYPES_H
+
+struct sym_checksum {
+ u64 addr;
+ u64 checksum;
+};
+
+#ifdef BUILD_KLP
+
+#include <xxhash.h>
+
+struct checksum {
+ XXH3_state_t *state;
+ XXH64_hash_t checksum;
+};
+
+#else /* !BUILD_KLP */
+
+struct checksum {};
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_TYPES_H */
diff --git a/tools/objtool/include/objtool/disas.h b/tools/objtool/include/objtool/disas.h
new file mode 100644
index 000000000000..e8f395eff159
--- /dev/null
+++ b/tools/objtool/include/objtool/disas.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _DISAS_H
+#define _DISAS_H
+
+struct alternative;
+struct disas_context;
+struct disassemble_info;
+
+#ifdef DISAS
+
+struct disas_context *disas_context_create(struct objtool_file *file);
+void disas_context_destroy(struct disas_context *dctx);
+void disas_warned_funcs(struct disas_context *dctx);
+void disas_funcs(struct disas_context *dctx);
+int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options);
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn);
+char *disas_result(struct disas_context *dctx);
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+ const char *format, ...);
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...);
+char *disas_alt_name(struct alternative *alt);
+const char *disas_alt_type_name(struct instruction *insn);
+
+#else /* DISAS */
+
+#include <objtool/warn.h>
+
+static inline struct disas_context *disas_context_create(struct objtool_file *file)
+{
+ WARN("Rebuild with libopcodes for disassembly support");
+ return NULL;
+}
+
+static inline void disas_context_destroy(struct disas_context *dctx) {}
+static inline void disas_warned_funcs(struct disas_context *dctx) {}
+static inline void disas_funcs(struct disas_context *dctx) {}
+
+static inline int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options)
+{
+ return -1;
+}
+
+static inline size_t disas_insn(struct disas_context *dctx,
+ struct instruction *insn)
+{
+ return -1;
+}
+
+static inline char *disas_result(struct disas_context *dctx)
+{
+ return NULL;
+}
+
+static inline void disas_print_info(FILE *stream, struct instruction *insn,
+ int depth, const char *format, ...) {}
+static inline void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...) {}
+static inline char *disas_alt_name(struct alternative *alt)
+{
+ return NULL;
+}
+
+static inline const char *disas_alt_type_name(struct instruction *insn)
+{
+ return NULL;
+}
+
+#endif /* DISAS */
+
+#endif /* _DISAS_H */
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 0a2fa3ac0079..e12c516bd320 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -8,12 +8,21 @@
#include <stdio.h>
#include <gelf.h>
+#include <linux/string.h>
#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/rbtree.h>
#include <linux/jhash.h>
+
+#include <objtool/endianness.h>
+#include <objtool/checksum_types.h>
#include <arch/elf.h>
+#define SEC_NAME_LEN 1024
+#define SYM_NAME_LEN 512
+
+#define bswap_if_needed(elf, val) __bswap_if_needed(&elf->ehdr, val)
+
#ifdef LIBELF_USE_DEPRECATED
# define elf_getshdrnum elf_getshnum
# define elf_getshdrstrndx elf_getshstrndx
@@ -40,24 +49,27 @@ struct section {
struct section *base, *rsec;
struct symbol *sym;
Elf_Data *data;
- char *name;
+ const char *name;
int idx;
bool _changed, text, rodata, noinstr, init, truncate;
struct reloc *relocs;
+ unsigned long nr_alloc_relocs;
+ struct section *twin;
};
struct symbol {
struct list_head list;
+ struct list_head global_list;
struct rb_node node;
struct elf_hash_node hash;
struct elf_hash_node name_hash;
GElf_Sym sym;
struct section *sec;
- char *name;
+ const char *name, *demangled_name;
unsigned int idx, len;
unsigned long offset;
unsigned long __subtree_last;
- struct symbol *pfunc, *cfunc, *alias;
+ struct symbol *pfunc, *cfunc, *alias, *file;
unsigned char bind, type;
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
@@ -70,9 +82,18 @@ struct symbol {
u8 local_label : 1;
u8 frame_pointer : 1;
u8 ignore : 1;
+ u8 nocfi : 1;
+ u8 cold : 1;
+ u8 prefix : 1;
+ u8 debug_checksum : 1;
+ u8 changed : 1;
+ u8 included : 1;
+ u8 klp : 1;
struct list_head pv_target;
struct reloc *relocs;
struct section *group_sec;
+ struct checksum csum;
+ struct symbol *twin, *clone;
};
struct reloc {
@@ -87,9 +108,10 @@ struct elf {
GElf_Ehdr ehdr;
int fd;
bool changed;
- char *name;
+ const char *name, *tmp_name;
unsigned int num_files;
struct list_head sections;
+ struct list_head symbols;
unsigned long num_relocs;
int symbol_bits;
@@ -109,14 +131,37 @@ struct elf {
};
struct elf *elf_open_read(const char *name, int flags);
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name);
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, unsigned int nr);
+ size_t size, size_t entsize,
+ unsigned int type, unsigned int align,
+ unsigned int flags);
struct section *elf_create_section_pair(struct elf *elf, const char *name,
size_t entsize, unsigned int nr,
unsigned int reloc_nr);
-struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+ unsigned int reloc_nr);
+
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+ struct section *sec, unsigned int bind,
+ unsigned int type, unsigned long offset,
+ size_t size);
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec);
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data,
+ size_t size);
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str);
+
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+ unsigned long offset, struct symbol *sym,
+ s64 addend, unsigned int type);
+
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+ unsigned int reloc_idx, unsigned long offset,
+ struct symbol *sym, s64 addend, unsigned int type);
struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
unsigned long offset,
@@ -130,16 +175,17 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
struct symbol *sym,
s64 addend);
-int elf_write_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int len,
- const char *insn);
+int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int len, const char *insn);
+
int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
+int elf_close(struct elf *elf);
struct section *find_section_by_name(const struct elf *elf, const char *name);
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name);
struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
int find_symbol_hole_containing(const struct section *sec, unsigned long offset);
struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
@@ -177,11 +223,76 @@ static inline unsigned int elf_text_rela_type(struct elf *elf)
return elf_addr_size(elf) == 4 ? R_TEXT32 : R_TEXT64;
}
+static inline bool is_undef_sym(struct symbol *sym)
+{
+ return !sym->sec->idx;
+}
+
+static inline bool is_null_sym(struct symbol *sym)
+{
+ return !sym->idx;
+}
+
+static inline bool is_sec_sym(struct symbol *sym)
+{
+ return sym->type == STT_SECTION;
+}
+
+static inline bool is_object_sym(struct symbol *sym)
+{
+ return sym->type == STT_OBJECT;
+}
+
+static inline bool is_func_sym(struct symbol *sym)
+{
+ return sym->type == STT_FUNC;
+}
+
+static inline bool is_file_sym(struct symbol *sym)
+{
+ return sym->type == STT_FILE;
+}
+
+static inline bool is_notype_sym(struct symbol *sym)
+{
+ return sym->type == STT_NOTYPE;
+}
+
+static inline bool is_global_sym(struct symbol *sym)
+{
+ return sym->bind == STB_GLOBAL;
+}
+
+static inline bool is_weak_sym(struct symbol *sym)
+{
+ return sym->bind == STB_WEAK;
+}
+
+static inline bool is_local_sym(struct symbol *sym)
+{
+ return sym->bind == STB_LOCAL;
+}
+
+static inline bool is_prefix_func(struct symbol *sym)
+{
+ return sym->prefix;
+}
+
static inline bool is_reloc_sec(struct section *sec)
{
return sec->sh.sh_type == SHT_RELA || sec->sh.sh_type == SHT_REL;
}
+static inline bool is_string_sec(struct section *sec)
+{
+ return sec->sh.sh_flags & SHF_STRINGS;
+}
+
+static inline bool is_text_sec(struct section *sec)
+{
+ return sec->sh.sh_flags & SHF_EXECINSTR;
+}
+
static inline bool sec_changed(struct section *sec)
{
return sec->_changed;
@@ -222,6 +333,11 @@ static inline bool is_32bit_reloc(struct reloc *reloc)
return reloc->sec->sh.sh_entsize < 16;
}
+static inline unsigned long sec_size(struct section *sec)
+{
+ return sec->sh.sh_size;
+}
+
#define __get_reloc_field(reloc, field) \
({ \
is_32bit_reloc(reloc) ? \
@@ -299,6 +415,15 @@ static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned
mark_sec_changed(elf, reloc->sec, true);
}
+static inline unsigned int annotype(struct elf *elf, struct section *sec,
+ struct reloc *reloc)
+{
+ unsigned int type;
+
+ type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * 8) + 4);
+ return bswap_if_needed(elf, type);
+}
+
#define RELOC_JUMP_TABLE_BIT 1UL
/* Does reloc mark the beginning of a jump table? */
@@ -324,28 +449,54 @@ static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next)
reloc->_sym_next_reloc = (unsigned long)next | bit;
}
-#define for_each_sec(file, sec) \
- list_for_each_entry(sec, &file->elf->sections, list)
+#define for_each_sec(elf, sec) \
+ list_for_each_entry(sec, &elf->sections, list)
#define sec_for_each_sym(sec, sym) \
list_for_each_entry(sym, &sec->symbol_list, list)
-#define for_each_sym(file, sym) \
- for (struct section *__sec, *__fake = (struct section *)1; \
- __fake; __fake = NULL) \
- for_each_sec(file, __sec) \
- sec_for_each_sym(__sec, sym)
+#define sec_prev_sym(sym) \
+ sym->sec && sym->list.prev != &sym->sec->symbol_list ? \
+ list_prev_entry(sym, list) : NULL
+
+#define for_each_sym(elf, sym) \
+ list_for_each_entry(sym, &elf->symbols, global_list)
+
+#define for_each_sym_continue(elf, sym) \
+ list_for_each_entry_continue(sym, &elf->symbols, global_list)
+
+#define rsec_next_reloc(rsec, reloc) \
+ reloc_idx(reloc) < sec_num_entries(rsec) - 1 ? reloc + 1 : NULL
#define for_each_reloc(rsec, reloc) \
- for (int __i = 0, __fake = 1; __fake; __fake = 0) \
- for (reloc = rsec->relocs; \
- __i < sec_num_entries(rsec); \
- __i++, reloc++)
+ for (reloc = rsec->relocs; reloc; reloc = rsec_next_reloc(rsec, reloc))
#define for_each_reloc_from(rsec, reloc) \
- for (int __i = reloc_idx(reloc); \
- __i < sec_num_entries(rsec); \
- __i++, reloc++)
+ for (; reloc; reloc = rsec_next_reloc(rsec, reloc))
+
+#define for_each_reloc_continue(rsec, reloc) \
+ for (reloc = rsec_next_reloc(rsec, reloc); reloc; \
+ reloc = rsec_next_reloc(rsec, reloc))
+
+#define sym_for_each_reloc(elf, sym, reloc) \
+ for (reloc = find_reloc_by_dest_range(elf, sym->sec, \
+ sym->offset, sym->len); \
+ reloc && reloc_offset(reloc) < sym->offset + sym->len; \
+ reloc = rsec_next_reloc(sym->sec->rsec, reloc))
+
+static inline struct symbol *get_func_prefix(struct symbol *func)
+{
+ struct symbol *prev;
+
+ if (!is_func_sym(func))
+ return NULL;
+
+ prev = sec_prev_sym(func);
+ if (prev && is_prefix_func(prev))
+ return prev;
+
+ return NULL;
+}
#define OFFSET_STRIDE_BITS 4
#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
index 4d2aa9b0fe2f..aebcd2338668 100644
--- a/tools/objtool/include/objtool/endianness.h
+++ b/tools/objtool/include/objtool/endianness.h
@@ -4,7 +4,6 @@
#include <linux/kernel.h>
#include <endian.h>
-#include <objtool/elf.h>
/*
* Does a byte swap if target file endianness doesn't match the host, i.e. cross
@@ -12,16 +11,16 @@
* To be used for multi-byte values conversion, which are read from / about
* to be written to a target native endianness ELF file.
*/
-static inline bool need_bswap(struct elf *elf)
+static inline bool need_bswap(GElf_Ehdr *ehdr)
{
return (__BYTE_ORDER == __LITTLE_ENDIAN) ^
- (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB);
+ (ehdr->e_ident[EI_DATA] == ELFDATA2LSB);
}
-#define bswap_if_needed(elf, val) \
+#define __bswap_if_needed(ehdr, val) \
({ \
__typeof__(val) __ret; \
- bool __need_bswap = need_bswap(elf); \
+ bool __need_bswap = need_bswap(ehdr); \
switch (sizeof(val)) { \
case 8: \
__ret = __need_bswap ? bswap_64(val) : (val); break; \
diff --git a/tools/objtool/include/objtool/klp.h b/tools/objtool/include/objtool/klp.h
new file mode 100644
index 000000000000..ad830a7ce55b
--- /dev/null
+++ b/tools/objtool/include/objtool/klp.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_KLP_H
+#define _OBJTOOL_KLP_H
+
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHN_LIVEPATCH 0xff20
+
+/*
+ * __klp_objects and __klp_funcs are created by klp diff and used by the patch
+ * module init code to build the klp_patch, klp_object and klp_func structs
+ * needed by the livepatch API.
+ */
+#define KLP_OBJECTS_SEC "__klp_objects"
+#define KLP_FUNCS_SEC "__klp_funcs"
+
+/*
+ * __klp_relocs is an intermediate section which are created by klp diff and
+ * converted into KLP symbols/relas by "objtool klp post-link". This is needed
+ * to work around the linker, which doesn't preserve SHN_LIVEPATCH or
+ * SHF_RELA_LIVEPATCH, nor does it support having two RELA sections for a
+ * single PROGBITS section.
+ */
+#define KLP_RELOCS_SEC "__klp_relocs"
+#define KLP_STRINGS_SEC ".rodata.klp.str1.1"
+
+struct klp_reloc {
+ void *offset;
+ void *sym;
+ u32 type;
+};
+
+int cmd_klp_diff(int argc, const char **argv);
+int cmd_klp_post_link(int argc, const char **argv);
+
+#endif /* _OBJTOOL_KLP_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index c0dc86a78ff6..6dc12a59ad00 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -28,7 +28,7 @@ struct objtool_file {
struct list_head mcount_loc_list;
struct list_head endbr_list;
struct list_head call_list;
- bool ignore_unreachables, hints, rodata;
+ bool ignore_unreachables, hints, rodata, klp;
unsigned int nr_endbr;
unsigned int nr_endbr_int;
@@ -39,6 +39,10 @@ struct objtool_file {
struct pv_state *pv_ops;
};
+char *top_level_dir(const char *file);
+
+int init_signal_handler(void);
+
struct objtool_file *objtool_open_read(const char *_objname);
int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 72d09c0adf1a..121c3761899c 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -25,7 +25,7 @@ struct special_alt {
struct section *new_sec;
unsigned long new_off;
- unsigned int orig_len, new_len; /* group only */
+ unsigned int orig_len, new_len, feature; /* group only */
};
int special_get_alts(struct elf *elf, struct list_head *alts);
@@ -38,4 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
struct reloc *arch_find_switch_table(struct objtool_file *file,
struct instruction *insn,
unsigned long *table_size);
+const char *arch_cpu_feature_name(int feature_number);
+
#endif /* _SPECIAL_H */
diff --git a/tools/objtool/include/objtool/trace.h b/tools/objtool/include/objtool/trace.h
new file mode 100644
index 000000000000..70b574366797
--- /dev/null
+++ b/tools/objtool/include/objtool/trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _TRACE_H
+#define _TRACE_H
+
+#include <objtool/check.h>
+#include <objtool/disas.h>
+
+#ifdef DISAS
+
+extern bool trace;
+extern int trace_depth;
+
+#define TRACE(fmt, ...) \
+({ if (trace) \
+ fprintf(stderr, fmt, ##__VA_ARGS__); \
+})
+
+/*
+ * Print the instruction address and a message. The instruction
+ * itself is not printed.
+ */
+#define TRACE_ADDR(insn, fmt, ...) \
+({ \
+ if (trace) { \
+ disas_print_info(stderr, insn, trace_depth - 1, \
+ fmt "\n", ##__VA_ARGS__); \
+ } \
+})
+
+/*
+ * Print the instruction address, the instruction and a message.
+ */
+#define TRACE_INSN(insn, fmt, ...) \
+({ \
+ if (trace) { \
+ disas_print_insn(stderr, objtool_disas_ctx, \
+ insn, trace_depth - 1, \
+ fmt, ##__VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ insn->trace = 1; \
+ } \
+})
+
+#define TRACE_INSN_STATE(insn, sprev, snext) \
+({ \
+ if (trace) \
+ trace_insn_state(insn, sprev, snext); \
+})
+
+#define TRACE_ALT_FMT(pfx, fmt) pfx "<%s.%lx> " fmt
+#define TRACE_ALT_ARG(insn) disas_alt_type_name(insn), (insn)->offset
+
+#define TRACE_ALT(insn, fmt, ...) \
+ TRACE_INSN(insn, TRACE_ALT_FMT("", fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO(insn, pfx, fmt, ...) \
+ TRACE_ADDR(insn, TRACE_ALT_FMT(pfx, fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO_NOADDR(insn, pfx, fmt, ...) \
+ TRACE_ADDR(NULL, TRACE_ALT_FMT(pfx, fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_BEGIN(insn, alt, alt_name) \
+({ \
+ if (trace) { \
+ alt_name = disas_alt_name(alt); \
+ trace_alt_begin(insn, alt, alt_name); \
+ } \
+})
+
+#define TRACE_ALT_END(insn, alt, alt_name) \
+({ \
+ if (trace) { \
+ trace_alt_end(insn, alt, alt_name); \
+ free(alt_name); \
+ } \
+})
+
+static inline void trace_enable(void)
+{
+ trace = true;
+ trace_depth = 0;
+}
+
+static inline void trace_disable(void)
+{
+ trace = false;
+}
+
+static inline void trace_depth_inc(void)
+{
+ if (trace)
+ trace_depth++;
+}
+
+static inline void trace_depth_dec(void)
+{
+ if (trace)
+ trace_depth--;
+}
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+ struct insn_state *snext);
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name);
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name);
+
+#else /* DISAS */
+
+#define TRACE(fmt, ...) ({})
+#define TRACE_ADDR(insn, fmt, ...) ({})
+#define TRACE_INSN(insn, fmt, ...) ({})
+#define TRACE_INSN_STATE(insn, sprev, snext) ({})
+#define TRACE_ALT(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO_NOADDR(insn, fmt, ...) ({})
+#define TRACE_ALT_BEGIN(insn, alt, alt_name) ({})
+#define TRACE_ALT_END(insn, alt, alt_name) ({})
+
+
+static inline void trace_enable(void) {}
+static inline void trace_disable(void) {}
+static inline void trace_depth_inc(void) {}
+static inline void trace_depth_dec(void) {}
+static inline void trace_alt_begin(struct instruction *orig_insn,
+ struct alternative *alt,
+ char *alt_name) {};
+static inline void trace_alt_end(struct instruction *orig_insn,
+ struct alternative *alt,
+ char *alt_name) {};
+
+#endif
+
+#endif /* _TRACE_H */
diff --git a/tools/objtool/include/objtool/util.h b/tools/objtool/include/objtool/util.h
new file mode 100644
index 000000000000..a0180b312f73
--- /dev/null
+++ b/tools/objtool/include/objtool/util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _UTIL_H
+#define _UTIL_H
+
+#include <objtool/warn.h>
+
+#define snprintf_check(str, size, format, args...) \
+({ \
+ int __ret = snprintf(str, size, format, args); \
+ if (__ret < 0) \
+ ERROR_GLIBC("snprintf"); \
+ else if (__ret >= size) \
+ ERROR("snprintf() failed for '" format "'", args); \
+ else \
+ __ret = 0; \
+ __ret; \
+})
+
+#endif /* _UTIL_H */
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index cb8fe846d9dd..25ff7942b4d5 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -77,9 +77,11 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define WARN_INSN(insn, format, ...) \
({ \
struct instruction *_insn = (insn); \
- if (!_insn->sym || !_insn->sym->warned) \
+ if (!_insn->sym || !_insn->sym->warned) { \
WARN_FUNC(_insn->sec, _insn->offset, format, \
##__VA_ARGS__); \
+ BT_INSN(_insn, ""); \
+ } \
if (_insn->sym) \
_insn->sym->warned = 1; \
})
@@ -87,10 +89,15 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define BT_INSN(insn, format, ...) \
({ \
if (opts.verbose || opts.backtrace) { \
- struct instruction *_insn = (insn); \
- char *_str = offstr(_insn->sec, _insn->offset); \
- WARN(" %s: " format, _str, ##__VA_ARGS__); \
- free(_str); \
+ struct instruction *__insn = (insn); \
+ char *_str = offstr(__insn->sec, __insn->offset); \
+ const char *_istr = objtool_disas_insn(__insn); \
+ int _len; \
+ _len = snprintf(NULL, 0, " %s: " format, _str, ##__VA_ARGS__); \
+ _len = (_len < 50) ? 50 - _len : 0; \
+ WARN(" %s: " format " %*s%s", _str, ##__VA_ARGS__, _len, "", _istr); \
+ free(_str); \
+ __insn->trace = 1; \
} \
})
@@ -102,4 +109,53 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__)
#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__)
+extern bool debug;
+extern int indent;
+
+static inline void unindent(int *unused) { indent--; }
+
+/*
+ * Clang prior to 17 is being silly and considers many __cleanup() variables
+ * as unused (because they are, their sole purpose is to go out of scope).
+ *
+ * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61
+ */
+#undef __cleanup
+#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
+
+#define __dbg(format, ...) \
+ fprintf(stderr, \
+ "DEBUG: %s%s" format "\n", \
+ objname ?: "", \
+ objname ? ": " : "", \
+ ##__VA_ARGS__)
+
+#define dbg(args...) \
+({ \
+ if (unlikely(debug)) \
+ __dbg(args); \
+})
+
+#define __dbg_indent(format, ...) \
+({ \
+ if (unlikely(debug)) \
+ __dbg("%*s" format, indent * 8, "", ##__VA_ARGS__); \
+})
+
+#define dbg_indent(args...) \
+ int __cleanup(unindent) __dummy_##__COUNTER__; \
+ __dbg_indent(args); \
+ indent++
+
+#define dbg_checksum(func, insn, checksum) \
+({ \
+ if (unlikely(insn->sym && insn->sym->pfunc && \
+ insn->sym->pfunc->debug_checksum)) { \
+ char *insn_off = offstr(insn->sec, insn->offset); \
+ __dbg("checksum: %s %s %016lx", \
+ func->name, insn_off, checksum); \
+ free(insn_off); \
+ } \
+})
+
#endif /* _WARN_H */
diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c
new file mode 100644
index 000000000000..4d1f9e9977eb
--- /dev/null
+++ b/tools/objtool/klp-diff.c
@@ -0,0 +1,1723 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#define _GNU_SOURCE /* memmem() */
+#include <subcmd/parse-options.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/arch.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <arch/special.h>
+
+#include <linux/objtool_types.h>
+#include <linux/livepatch_external.h>
+#include <linux/stringify.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+
+struct elfs {
+ struct elf *orig, *patched, *out;
+ const char *modname;
+};
+
+struct export {
+ struct hlist_node hash;
+ char *mod, *sym;
+};
+
+static const char * const klp_diff_usage[] = {
+ "objtool klp diff [<options>] <in1.o> <in2.o> <out.o>",
+ NULL,
+};
+
+static const struct option klp_diff_options[] = {
+ OPT_GROUP("Options:"),
+ OPT_BOOLEAN('d', "debug", &debug, "enable debug output"),
+ OPT_END(),
+};
+
+static DEFINE_HASHTABLE(exports, 15);
+
+static inline u32 str_hash(const char *str)
+{
+ return jhash(str, strlen(str), 0);
+}
+
+static char *escape_str(const char *orig)
+{
+ size_t len = 0;
+ const char *a;
+ char *b, *new;
+
+ for (a = orig; *a; a++) {
+ switch (*a) {
+ case '\001': len += 5; break;
+ case '\n':
+ case '\t': len += 2; break;
+ default: len++;
+ }
+ }
+
+ new = malloc(len + 1);
+ if (!new)
+ return NULL;
+
+ for (a = orig, b = new; *a; a++) {
+ switch (*a) {
+ case '\001': memcpy(b, "<SOH>", 5); b += 5; break;
+ case '\n': *b++ = '\\'; *b++ = 'n'; break;
+ case '\t': *b++ = '\\'; *b++ = 't'; break;
+ default: *b++ = *a;
+ }
+ }
+
+ *b = '\0';
+ return new;
+}
+
+static int read_exports(void)
+{
+ const char *symvers = "Module.symvers";
+ char line[1024], *path = NULL;
+ unsigned int line_num = 1;
+ FILE *file;
+
+ file = fopen(symvers, "r");
+ if (!file) {
+ path = top_level_dir(symvers);
+ if (!path) {
+ ERROR("can't open '%s', \"objtool diff\" should be run from the kernel tree", symvers);
+ return -1;
+ }
+
+ file = fopen(path, "r");
+ if (!file) {
+ ERROR_GLIBC("fopen");
+ return -1;
+ }
+ }
+
+ while (fgets(line, 1024, file)) {
+ char *sym, *mod, *type;
+ struct export *export;
+
+ sym = strchr(line, '\t');
+ if (!sym) {
+ ERROR("malformed Module.symvers (sym) at line %d", line_num);
+ return -1;
+ }
+
+ *sym++ = '\0';
+
+ mod = strchr(sym, '\t');
+ if (!mod) {
+ ERROR("malformed Module.symvers (mod) at line %d", line_num);
+ return -1;
+ }
+
+ *mod++ = '\0';
+
+ type = strchr(mod, '\t');
+ if (!type) {
+ ERROR("malformed Module.symvers (type) at line %d", line_num);
+ return -1;
+ }
+
+ *type++ = '\0';
+
+ if (*sym == '\0' || *mod == '\0') {
+ ERROR("malformed Module.symvers at line %d", line_num);
+ return -1;
+ }
+
+ export = calloc(1, sizeof(*export));
+ if (!export) {
+ ERROR_GLIBC("calloc");
+ return -1;
+ }
+
+ export->mod = strdup(mod);
+ if (!export->mod) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
+
+ export->sym = strdup(sym);
+ if (!export->sym) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
+
+ hash_add(exports, &export->hash, str_hash(sym));
+ }
+
+ free(path);
+ fclose(file);
+
+ return 0;
+}
+
+static int read_sym_checksums(struct elf *elf)
+{
+ struct section *sec;
+
+ sec = find_section_by_name(elf, ".discard.sym_checksum");
+ if (!sec) {
+ ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool --checksum'?",
+ elf->name);
+ return -1;
+ }
+
+ if (!sec->rsec) {
+ ERROR("missing reloc section for .discard.sym_checksum");
+ return -1;
+ }
+
+ if (sec_size(sec) % sizeof(struct sym_checksum)) {
+ ERROR("struct sym_checksum size mismatch");
+ return -1;
+ }
+
+ for (int i = 0; i < sec_size(sec) / sizeof(struct sym_checksum); i++) {
+ struct sym_checksum *sym_checksum;
+ struct reloc *reloc;
+ struct symbol *sym;
+
+ sym_checksum = (struct sym_checksum *)sec->data->d_buf + i;
+
+ reloc = find_reloc_by_dest(elf, sec, i * sizeof(*sym_checksum));
+ if (!reloc) {
+ ERROR("can't find reloc for sym_checksum[%d]", i);
+ return -1;
+ }
+
+ sym = reloc->sym;
+
+ if (is_sec_sym(sym)) {
+ ERROR("not sure how to handle section %s", sym->name);
+ return -1;
+ }
+
+ if (is_func_sym(sym))
+ sym->csum.checksum = sym_checksum->checksum;
+ }
+
+ return 0;
+}
+
+static struct symbol *first_file_symbol(struct elf *elf)
+{
+ struct symbol *sym;
+
+ for_each_sym(elf, sym) {
+ if (is_file_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
+static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym)
+{
+ for_each_sym_continue(elf, sym) {
+ if (is_file_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
+/*
+ * Certain static local variables should never be correlated. They will be
+ * used in place rather than referencing the originals.
+ */
+static bool is_uncorrelated_static_local(struct symbol *sym)
+{
+ static const char * const vars[] = {
+ "__already_done.",
+ "__func__.",
+ "__key.",
+ "__warned.",
+ "_entry.",
+ "_entry_ptr.",
+ "_rs.",
+ "descriptor.",
+ "CSWTCH.",
+ };
+
+ if (!is_object_sym(sym) || !is_local_sym(sym))
+ return false;
+
+ if (!strcmp(sym->sec->name, ".data.once"))
+ return true;
+
+ for (int i = 0; i < ARRAY_SIZE(vars); i++) {
+ if (strstarts(sym->name, vars[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Clang emits several useless .Ltmp_* code labels.
+ */
+static bool is_clang_tmp_label(struct symbol *sym)
+{
+ return sym->type == STT_NOTYPE &&
+ is_text_sec(sym->sec) &&
+ strstarts(sym->name, ".Ltmp") &&
+ isdigit(sym->name[5]);
+}
+
+static bool is_special_section(struct section *sec)
+{
+ static const char * const specials[] = {
+ ".altinstructions",
+ ".smp_locks",
+ "__bug_table",
+ "__ex_table",
+ "__jump_table",
+ "__mcount_loc",
+
+ /*
+ * Extract .static_call_sites here to inherit non-module
+ * preferential treatment. The later static call processing
+ * during klp module build will be skipped when it sees this
+ * section already exists.
+ */
+ ".static_call_sites",
+ };
+
+ static const char * const non_special_discards[] = {
+ ".discard.addressable",
+ ".discard.sym_checksum",
+ };
+
+ if (is_text_sec(sec))
+ return false;
+
+ for (int i = 0; i < ARRAY_SIZE(specials); i++) {
+ if (!strcmp(sec->name, specials[i]))
+ return true;
+ }
+
+ /* Most .discard data sections are special */
+ for (int i = 0; i < ARRAY_SIZE(non_special_discards); i++) {
+ if (!strcmp(sec->name, non_special_discards[i]))
+ return false;
+ }
+
+ return strstarts(sec->name, ".discard.");
+}
+
+/*
+ * These sections are referenced by special sections but aren't considered
+ * special sections themselves.
+ */
+static bool is_special_section_aux(struct section *sec)
+{
+ static const char * const specials_aux[] = {
+ ".altinstr_replacement",
+ ".altinstr_aux",
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(specials_aux); i++) {
+ if (!strcmp(sec->name, specials_aux[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * These symbols should never be correlated, so their local patched versions
+ * are used instead of linking to the originals.
+ */
+static bool dont_correlate(struct symbol *sym)
+{
+ return is_file_sym(sym) ||
+ is_null_sym(sym) ||
+ is_sec_sym(sym) ||
+ is_prefix_func(sym) ||
+ is_uncorrelated_static_local(sym) ||
+ is_clang_tmp_label(sym) ||
+ is_string_sec(sym->sec) ||
+ is_special_section(sym->sec) ||
+ is_special_section_aux(sym->sec) ||
+ strstarts(sym->name, "__initcall__");
+}
+
+/*
+ * For each symbol in the original kernel, find its corresponding "twin" in the
+ * patched kernel.
+ */
+static int correlate_symbols(struct elfs *e)
+{
+ struct symbol *file1_sym, *file2_sym;
+ struct symbol *sym1, *sym2;
+
+ /* Correlate locals */
+ for (file1_sym = first_file_symbol(e->orig),
+ file2_sym = first_file_symbol(e->patched); ;
+ file1_sym = next_file_symbol(e->orig, file1_sym),
+ file2_sym = next_file_symbol(e->patched, file2_sym)) {
+
+ if (!file1_sym && file2_sym) {
+ ERROR("FILE symbol mismatch: NULL != %s", file2_sym->name);
+ return -1;
+ }
+
+ if (file1_sym && !file2_sym) {
+ ERROR("FILE symbol mismatch: %s != NULL", file1_sym->name);
+ return -1;
+ }
+
+ if (!file1_sym)
+ break;
+
+ if (strcmp(file1_sym->name, file2_sym->name)) {
+ ERROR("FILE symbol mismatch: %s != %s", file1_sym->name, file2_sym->name);
+ return -1;
+ }
+
+ file1_sym->twin = file2_sym;
+ file2_sym->twin = file1_sym;
+
+ sym1 = file1_sym;
+
+ for_each_sym_continue(e->orig, sym1) {
+ if (is_file_sym(sym1) || !is_local_sym(sym1))
+ break;
+
+ if (dont_correlate(sym1))
+ continue;
+
+ sym2 = file2_sym;
+ for_each_sym_continue(e->patched, sym2) {
+ if (is_file_sym(sym2) || !is_local_sym(sym2))
+ break;
+
+ if (sym2->twin || dont_correlate(sym2))
+ continue;
+
+ if (strcmp(sym1->demangled_name, sym2->demangled_name))
+ continue;
+
+ sym1->twin = sym2;
+ sym2->twin = sym1;
+ break;
+ }
+ }
+ }
+
+ /* Correlate globals */
+ for_each_sym(e->orig, sym1) {
+ if (sym1->bind == STB_LOCAL)
+ continue;
+
+ sym2 = find_global_symbol_by_name(e->patched, sym1->name);
+
+ if (sym2 && !sym2->twin && !strcmp(sym1->name, sym2->name)) {
+ sym1->twin = sym2;
+ sym2->twin = sym1;
+ }
+ }
+
+ for_each_sym(e->orig, sym1) {
+ if (sym1->twin || dont_correlate(sym1))
+ continue;
+ WARN("no correlation: %s", sym1->name);
+ }
+
+ return 0;
+}
+
+/* "sympos" is used by livepatch to disambiguate duplicate symbol names */
+static unsigned long find_sympos(struct elf *elf, struct symbol *sym)
+{
+ bool vmlinux = str_ends_with(objname, "vmlinux.o");
+ unsigned long sympos = 0, nr_matches = 0;
+ bool has_dup = false;
+ struct symbol *s;
+
+ if (sym->bind != STB_LOCAL)
+ return 0;
+
+ if (vmlinux && sym->type == STT_FUNC) {
+ /*
+ * HACK: Unfortunately, symbol ordering can differ between
+ * vmlinux.o and vmlinux due to the linker script emitting
+ * .text.unlikely* before .text*. Count .text.unlikely* first.
+ *
+ * TODO: Disambiguate symbols more reliably (checksums?)
+ */
+ for_each_sym(elf, s) {
+ if (strstarts(s->sec->name, ".text.unlikely") &&
+ !strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ for_each_sym(elf, s) {
+ if (!strstarts(s->sec->name, ".text.unlikely") &&
+ !strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ } else {
+ for_each_sym(elf, s) {
+ if (!strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ }
+
+ if (!sympos) {
+ ERROR("can't find sympos for %s", sym->name);
+ return ULONG_MAX;
+ }
+
+ return has_dup ? sympos : 0;
+}
+
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym);
+
+static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym,
+ bool data_too)
+{
+ struct section *out_sec = NULL;
+ unsigned long offset = 0;
+ struct symbol *out_sym;
+
+ if (data_too && !is_undef_sym(patched_sym)) {
+ struct section *patched_sec = patched_sym->sec;
+
+ out_sec = find_section_by_name(elf, patched_sec->name);
+ if (!out_sec) {
+ out_sec = elf_create_section(elf, patched_sec->name, 0,
+ patched_sec->sh.sh_entsize,
+ patched_sec->sh.sh_type,
+ patched_sec->sh.sh_addralign,
+ patched_sec->sh.sh_flags);
+ if (!out_sec)
+ return NULL;
+ }
+
+ if (is_string_sec(patched_sym->sec)) {
+ out_sym = elf_create_section_symbol(elf, out_sec);
+ if (!out_sym)
+ return NULL;
+
+ goto sym_created;
+ }
+
+ if (!is_sec_sym(patched_sym))
+ offset = sec_size(out_sec);
+
+ if (patched_sym->len || is_sec_sym(patched_sym)) {
+ void *data = NULL;
+ size_t size;
+
+ /* bss doesn't have data */
+ if (patched_sym->sec->data->d_buf)
+ data = patched_sym->sec->data->d_buf + patched_sym->offset;
+
+ if (is_sec_sym(patched_sym))
+ size = sec_size(patched_sym->sec);
+ else
+ size = patched_sym->len;
+
+ if (!elf_add_data(elf, out_sec, data, size))
+ return NULL;
+ }
+ }
+
+ out_sym = elf_create_symbol(elf, patched_sym->name, out_sec,
+ patched_sym->bind, patched_sym->type,
+ offset, patched_sym->len);
+ if (!out_sym)
+ return NULL;
+
+sym_created:
+ patched_sym->clone = out_sym;
+ out_sym->clone = patched_sym;
+
+ return out_sym;
+}
+
+static const char *sym_type(struct symbol *sym)
+{
+ switch (sym->type) {
+ case STT_NOTYPE: return "NOTYPE";
+ case STT_OBJECT: return "OBJECT";
+ case STT_FUNC: return "FUNC";
+ case STT_SECTION: return "SECTION";
+ case STT_FILE: return "FILE";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *sym_bind(struct symbol *sym)
+{
+ switch (sym->bind) {
+ case STB_LOCAL: return "LOCAL";
+ case STB_GLOBAL: return "GLOBAL";
+ case STB_WEAK: return "WEAK";
+ default: return "UNKNOWN";
+ }
+}
+
+/*
+ * Copy a symbol to the output object, optionally including its data and
+ * relocations.
+ */
+static struct symbol *clone_symbol(struct elfs *e, struct symbol *patched_sym,
+ bool data_too)
+{
+ struct symbol *pfx;
+
+ if (patched_sym->clone)
+ return patched_sym->clone;
+
+ dbg_indent("%s%s", patched_sym->name, data_too ? " [+DATA]" : "");
+
+ /* Make sure the prefix gets cloned first */
+ if (is_func_sym(patched_sym) && data_too) {
+ pfx = get_func_prefix(patched_sym);
+ if (pfx)
+ clone_symbol(e, pfx, true);
+ }
+
+ if (!__clone_symbol(e->out, patched_sym, data_too))
+ return NULL;
+
+ if (data_too && clone_sym_relocs(e, patched_sym))
+ return NULL;
+
+ return patched_sym->clone;
+}
+
+static void mark_included_function(struct symbol *func)
+{
+ struct symbol *pfx;
+
+ func->included = 1;
+
+ /* Include prefix function */
+ pfx = get_func_prefix(func);
+ if (pfx)
+ pfx->included = 1;
+
+ /* Make sure .cold parent+child always stay together */
+ if (func->cfunc && func->cfunc != func)
+ func->cfunc->included = 1;
+ if (func->pfunc && func->pfunc != func)
+ func->pfunc->included = 1;
+}
+
+/*
+ * Copy all changed functions (and their dependencies) from the patched object
+ * to the output object.
+ */
+static int mark_changed_functions(struct elfs *e)
+{
+ struct symbol *sym_orig, *patched_sym;
+ bool changed = false;
+
+ /* Find changed functions */
+ for_each_sym(e->orig, sym_orig) {
+ if (!is_func_sym(sym_orig) || is_prefix_func(sym_orig))
+ continue;
+
+ patched_sym = sym_orig->twin;
+ if (!patched_sym)
+ continue;
+
+ if (sym_orig->csum.checksum != patched_sym->csum.checksum) {
+ patched_sym->changed = 1;
+ mark_included_function(patched_sym);
+ changed = true;
+ }
+ }
+
+ /* Find added functions and print them */
+ for_each_sym(e->patched, patched_sym) {
+ if (!is_func_sym(patched_sym) || is_prefix_func(patched_sym))
+ continue;
+
+ if (!patched_sym->twin) {
+ printf("%s: new function: %s\n", objname, patched_sym->name);
+ mark_included_function(patched_sym);
+ changed = true;
+ }
+ }
+
+ /* Print changed functions */
+ for_each_sym(e->patched, patched_sym) {
+ if (patched_sym->changed)
+ printf("%s: changed function: %s\n", objname, patched_sym->name);
+ }
+
+ return !changed ? -1 : 0;
+}
+
+static int clone_included_functions(struct elfs *e)
+{
+ struct symbol *patched_sym;
+
+ for_each_sym(e->patched, patched_sym) {
+ if (patched_sym->included) {
+ if (!clone_symbol(e, patched_sym, true))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Determine whether a relocation should reference the section rather than the
+ * underlying symbol.
+ */
+static bool section_reference_needed(struct section *sec)
+{
+ /*
+ * String symbols are zero-length and uncorrelated. It's easier to
+ * deal with them as section symbols.
+ */
+ if (is_string_sec(sec))
+ return true;
+
+ /*
+ * .rodata has mostly anonymous data so there's no way to determine the
+ * length of a needed reference. just copy the whole section if needed.
+ */
+ if (strstarts(sec->name, ".rodata"))
+ return true;
+
+ /* UBSAN anonymous data */
+ if (strstarts(sec->name, ".data..Lubsan") || /* GCC */
+ strstarts(sec->name, ".data..L__unnamed_")) /* Clang */
+ return true;
+
+ return false;
+}
+
+static bool is_reloc_allowed(struct reloc *reloc)
+{
+ return section_reference_needed(reloc->sym->sec) == is_sec_sym(reloc->sym);
+}
+
+static struct export *find_export(struct symbol *sym)
+{
+ struct export *export;
+
+ hash_for_each_possible(exports, export, hash, str_hash(sym->name)) {
+ if (!strcmp(export->sym, sym->name))
+ return export;
+ }
+
+ return NULL;
+}
+
+static const char *__find_modname(struct elfs *e)
+{
+ struct section *sec;
+ char *name;
+
+ sec = find_section_by_name(e->orig, ".modinfo");
+ if (!sec) {
+ ERROR("missing .modinfo section");
+ return NULL;
+ }
+
+ name = memmem(sec->data->d_buf, sec_size(sec), "\0name=", 6);
+ if (name)
+ return name + 6;
+
+ name = strdup(e->orig->name);
+ if (!name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ for (char *c = name; *c; c++) {
+ if (*c == '/')
+ name = c + 1;
+ else if (*c == '-')
+ *c = '_';
+ else if (*c == '.') {
+ *c = '\0';
+ break;
+ }
+ }
+
+ return name;
+}
+
+/* Get the object's module name as defined by the kernel (and klp_object) */
+static const char *find_modname(struct elfs *e)
+{
+ const char *modname;
+
+ if (e->modname)
+ return e->modname;
+
+ modname = __find_modname(e);
+ e->modname = modname;
+ return modname;
+}
+
+/*
+ * Copying a function from its native compiled environment to a kernel module
+ * removes its natural access to local functions/variables and unexported
+ * globals. References to such symbols need to be converted to KLP relocs so
+ * the kernel arch relocation code knows to apply them and where to find the
+ * symbols. Particularly, duplicate static symbols need to be disambiguated.
+ */
+static bool klp_reloc_needed(struct reloc *patched_reloc)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ struct export *export;
+
+ /* no external symbol to reference */
+ if (dont_correlate(patched_sym))
+ return false;
+
+ /* For included functions, a regular reloc will do. */
+ if (patched_sym->included)
+ return false;
+
+ /*
+ * If exported by a module, it has to be a klp reloc. Thanks to the
+ * clusterfunk that is late module patching, the patch module is
+ * allowed to be loaded before any modules it depends on.
+ *
+ * If exported by vmlinux, a normal reloc will do.
+ */
+ export = find_export(patched_sym);
+ if (export)
+ return strcmp(export->mod, "vmlinux");
+
+ if (!patched_sym->twin) {
+ /*
+ * Presumably the symbol and its reference were added by the
+ * patch. The symbol could be defined in this .o or in another
+ * .o in the patch module.
+ *
+ * This check needs to be *after* the export check due to the
+ * possibility of the patch adding a new UNDEF reference to an
+ * exported symbol.
+ */
+ return false;
+ }
+
+ /* Unexported symbol which lives in the original vmlinux or module. */
+ return true;
+}
+
+static int convert_reloc_sym_to_secsym(struct elf *elf, struct reloc *reloc)
+{
+ struct symbol *sym = reloc->sym;
+ struct section *sec = sym->sec;
+
+ if (!sec->sym && !elf_create_section_symbol(elf, sec))
+ return -1;
+
+ reloc->sym = sec->sym;
+ set_reloc_sym(elf, reloc, sym->idx);
+ set_reloc_addend(elf, reloc, sym->offset + reloc_addend(reloc));
+ return 0;
+}
+
+static int convert_reloc_secsym_to_sym(struct elf *elf, struct reloc *reloc)
+{
+ struct symbol *sym = reloc->sym;
+ struct section *sec = sym->sec;
+
+ /* If the symbol has a dedicated section, it's easy to find */
+ sym = find_symbol_by_offset(sec, 0);
+ if (sym && sym->len == sec_size(sec))
+ goto found_sym;
+
+ /* No dedicated section; find the symbol manually */
+ sym = find_symbol_containing(sec, arch_adjusted_addend(reloc));
+ if (!sym) {
+ /*
+ * This can happen for special section references to weak code
+ * whose symbol has been stripped by the linker.
+ */
+ return -1;
+ }
+
+found_sym:
+ reloc->sym = sym;
+ set_reloc_sym(elf, reloc, sym->idx);
+ set_reloc_addend(elf, reloc, reloc_addend(reloc) - sym->offset);
+ return 0;
+}
+
+/*
+ * Convert a relocation symbol reference to the needed format: either a section
+ * symbol or the underlying symbol itself.
+ */
+static int convert_reloc_sym(struct elf *elf, struct reloc *reloc)
+{
+ if (is_reloc_allowed(reloc))
+ return 0;
+
+ if (section_reference_needed(reloc->sym->sec))
+ return convert_reloc_sym_to_secsym(elf, reloc);
+ else
+ return convert_reloc_secsym_to_sym(elf, reloc);
+}
+
+/*
+ * Convert a regular relocation to a klp relocation (sort of).
+ */
+static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc,
+ struct section *sec, unsigned long offset,
+ struct export *export)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ s64 addend = reloc_addend(patched_reloc);
+ const char *sym_modname, *sym_orig_name;
+ static struct section *klp_relocs;
+ struct symbol *sym, *klp_sym;
+ unsigned long klp_reloc_off;
+ char sym_name[SYM_NAME_LEN];
+ struct klp_reloc klp_reloc;
+ unsigned long sympos;
+
+ if (!patched_sym->twin) {
+ ERROR("unexpected klp reloc for new symbol %s", patched_sym->name);
+ return -1;
+ }
+
+ /*
+ * Keep the original reloc intact for now to avoid breaking objtool run
+ * which relies on proper relocations for many of its features. This
+ * will be disabled later by "objtool klp post-link".
+ *
+ * Convert it to UNDEF (and WEAK to avoid modpost warnings).
+ */
+
+ sym = patched_sym->clone;
+ if (!sym) {
+ /* STB_WEAK: avoid modpost undefined symbol warnings */
+ sym = elf_create_symbol(e->out, patched_sym->name, NULL,
+ STB_WEAK, patched_sym->type, 0, 0);
+ if (!sym)
+ return -1;
+
+ patched_sym->clone = sym;
+ sym->clone = patched_sym;
+ }
+
+ if (!elf_create_reloc(e->out, sec, offset, sym, addend, reloc_type(patched_reloc)))
+ return -1;
+
+ /*
+ * Create the KLP symbol.
+ */
+
+ if (export) {
+ sym_modname = export->mod;
+ sym_orig_name = export->sym;
+ sympos = 0;
+ } else {
+ sym_modname = find_modname(e);
+ if (!sym_modname)
+ return -1;
+
+ sym_orig_name = patched_sym->twin->name;
+ sympos = find_sympos(e->orig, patched_sym->twin);
+ if (sympos == ULONG_MAX)
+ return -1;
+ }
+
+ /* symbol format: .klp.sym.modname.sym_name,sympos */
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_SYM_PREFIX "%s.%s,%ld",
+ sym_modname, sym_orig_name, sympos))
+ return -1;
+
+ klp_sym = find_symbol_by_name(e->out, sym_name);
+ if (!klp_sym) {
+ __dbg_indent("%s", sym_name);
+
+ /* STB_WEAK: avoid modpost undefined symbol warnings */
+ klp_sym = elf_create_symbol(e->out, sym_name, NULL,
+ STB_WEAK, patched_sym->type, 0, 0);
+ if (!klp_sym)
+ return -1;
+ }
+
+ /*
+ * Create the __klp_relocs entry. This will be converted to an actual
+ * KLP rela by "objtool klp post-link".
+ *
+ * This intermediate step is necessary to prevent corruption by the
+ * linker, which doesn't know how to properly handle two rela sections
+ * applying to the same base section.
+ */
+
+ if (!klp_relocs) {
+ klp_relocs = elf_create_section(e->out, KLP_RELOCS_SEC, 0,
+ 0, SHT_PROGBITS, 8, SHF_ALLOC);
+ if (!klp_relocs)
+ return -1;
+ }
+
+ klp_reloc_off = sec_size(klp_relocs);
+ memset(&klp_reloc, 0, sizeof(klp_reloc));
+
+ klp_reloc.type = reloc_type(patched_reloc);
+ if (!elf_add_data(e->out, klp_relocs, &klp_reloc, sizeof(klp_reloc)))
+ return -1;
+
+ /* klp_reloc.offset */
+ if (!sec->sym && !elf_create_section_symbol(e->out, sec))
+ return -1;
+
+ if (!elf_create_reloc(e->out, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, offset),
+ sec->sym, offset, R_ABS64))
+ return -1;
+
+ /* klp_reloc.sym */
+ if (!elf_create_reloc(e->out, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, sym),
+ klp_sym, addend, R_ABS64))
+ return -1;
+
+ return 0;
+}
+
+#define dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp) \
+ dbg_indent("%s+0x%lx: %s%s0x%lx [%s%s%s%s%s%s]", \
+ sec->name, offset, patched_sym->name, \
+ addend >= 0 ? "+" : "-", labs(addend), \
+ sym_type(patched_sym), \
+ patched_sym->type == STT_SECTION ? "" : " ", \
+ patched_sym->type == STT_SECTION ? "" : sym_bind(patched_sym), \
+ is_undef_sym(patched_sym) ? " UNDEF" : "", \
+ export ? " EXPORTED" : "", \
+ klp ? " KLP" : "")
+
+/* Copy a reloc and its symbol to the output object */
+static int clone_reloc(struct elfs *e, struct reloc *patched_reloc,
+ struct section *sec, unsigned long offset)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ struct export *export = find_export(patched_sym);
+ long addend = reloc_addend(patched_reloc);
+ struct symbol *out_sym;
+ bool klp;
+
+ if (!is_reloc_allowed(patched_reloc)) {
+ ERROR_FUNC(patched_reloc->sec->base, reloc_offset(patched_reloc),
+ "missing symbol for reference to %s+%ld",
+ patched_sym->name, addend);
+ return -1;
+ }
+
+ klp = klp_reloc_needed(patched_reloc);
+
+ dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp);
+
+ if (klp) {
+ if (clone_reloc_klp(e, patched_reloc, sec, offset, export))
+ return -1;
+
+ return 0;
+ }
+
+ /*
+ * Why !export sets 'data_too':
+ *
+ * Unexported non-klp symbols need to live in the patch module,
+ * otherwise there will be unresolved symbols. Notably, this includes:
+ *
+ * - New functions/data
+ * - String sections
+ * - Special section entries
+ * - Uncorrelated static local variables
+ * - UBSAN sections
+ */
+ out_sym = clone_symbol(e, patched_sym, patched_sym->included || !export);
+ if (!out_sym)
+ return -1;
+
+ /*
+ * For strings, all references use section symbols, thanks to
+ * section_reference_needed(). clone_symbol() has cloned an empty
+ * version of the string section. Now copy the string itself.
+ */
+ if (is_string_sec(patched_sym->sec)) {
+ const char *str = patched_sym->sec->data->d_buf + addend;
+
+ __dbg_indent("\"%s\"", escape_str(str));
+
+ addend = elf_add_string(e->out, out_sym->sec, str);
+ if (addend == -1)
+ return -1;
+ }
+
+ if (!elf_create_reloc(e->out, sec, offset, out_sym, addend,
+ reloc_type(patched_reloc)))
+ return -1;
+
+ return 0;
+}
+
+/* Copy all relocs needed for a symbol's contents */
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym)
+{
+ struct section *patched_rsec = patched_sym->sec->rsec;
+ struct reloc *patched_reloc;
+ unsigned long start, end;
+ struct symbol *out_sym;
+
+ out_sym = patched_sym->clone;
+ if (!out_sym) {
+ ERROR("no clone for %s", patched_sym->name);
+ return -1;
+ }
+
+ if (!patched_rsec)
+ return 0;
+
+ if (!is_sec_sym(patched_sym) && !patched_sym->len)
+ return 0;
+
+ if (is_string_sec(patched_sym->sec))
+ return 0;
+
+ if (is_sec_sym(patched_sym)) {
+ start = 0;
+ end = sec_size(patched_sym->sec);
+ } else {
+ start = patched_sym->offset;
+ end = start + patched_sym->len;
+ }
+
+ for_each_reloc(patched_rsec, patched_reloc) {
+ unsigned long offset;
+
+ if (reloc_offset(patched_reloc) < start ||
+ reloc_offset(patched_reloc) >= end)
+ continue;
+
+ /*
+ * Skip any reloc referencing .altinstr_aux. Its code is
+ * always patched by alternatives. See ALTERNATIVE_TERNARY().
+ */
+ if (patched_reloc->sym->sec &&
+ !strcmp(patched_reloc->sym->sec->name, ".altinstr_aux"))
+ continue;
+
+ if (convert_reloc_sym(e->patched, patched_reloc)) {
+ ERROR_FUNC(patched_rsec->base, reloc_offset(patched_reloc),
+ "failed to convert reloc sym '%s' to its proper format",
+ patched_reloc->sym->name);
+ return -1;
+ }
+
+ offset = out_sym->offset + (reloc_offset(patched_reloc) - patched_sym->offset);
+
+ if (clone_reloc(e, patched_reloc, out_sym->sec, offset))
+ return -1;
+ }
+ return 0;
+
+}
+
+static int create_fake_symbol(struct elf *elf, struct section *sec,
+ unsigned long offset, size_t size)
+{
+ char name[SYM_NAME_LEN];
+ unsigned int type;
+ static int ctr;
+ char *c;
+
+ if (snprintf_check(name, SYM_NAME_LEN, "%s_%d", sec->name, ctr++))
+ return -1;
+
+ for (c = name; *c; c++)
+ if (*c == '.')
+ *c = '_';
+
+ /*
+ * STT_NOTYPE: Prevent objtool from validating .altinstr_replacement
+ * while still allowing objdump to disassemble it.
+ */
+ type = is_text_sec(sec) ? STT_NOTYPE : STT_OBJECT;
+ return elf_create_symbol(elf, name, sec, STB_LOCAL, type, offset, size) ? 0 : -1;
+}
+
+/*
+ * Special sections (alternatives, etc) are basically arrays of structs.
+ * For all the special sections, create a symbol for each struct entry. This
+ * is a bit cumbersome, but it makes the extracting of the individual entries
+ * much more straightforward.
+ *
+ * There are three ways to identify the entry sizes for a special section:
+ *
+ * 1) ELF section header sh_entsize: Ideally this would be used almost
+ * everywhere. But unfortunately the toolchains make it difficult. The
+ * assembler .[push]section directive syntax only takes entsize when
+ * combined with SHF_MERGE. But Clang disallows combining SHF_MERGE with
+ * SHF_WRITE. And some special sections do need to be writable.
+ *
+ * Another place this wouldn't work is .altinstr_replacement, whose entries
+ * don't have a fixed size.
+ *
+ * 2) ANNOTATE_DATA_SPECIAL: This is a lightweight objtool annotation which
+ * points to the beginning of each entry. The size of the entry is then
+ * inferred by the location of the subsequent annotation (or end of
+ * section).
+ *
+ * 3) Simple array of pointers: If the special section is just a basic array of
+ * pointers, the entry size can be inferred by the number of relocations.
+ * No annotations needed.
+ *
+ * Note I also tried to create per-entry symbols at the time of creation, in
+ * the original [inline] asm. Unfortunately, creating uniquely named symbols
+ * is trickier than one might think, especially with Clang inline asm. I
+ * eventually just gave up trying to make that work, in favor of using
+ * ANNOTATE_DATA_SPECIAL and creating the symbols here after the fact.
+ */
+static int create_fake_symbols(struct elf *elf)
+{
+ struct section *sec;
+ struct reloc *reloc;
+
+ /*
+ * 1) Make symbols for all the ANNOTATE_DATA_SPECIAL entries:
+ */
+
+ sec = find_section_by_name(elf, ".discard.annotate_data");
+ if (!sec || !sec->rsec)
+ return 0;
+
+ for_each_reloc(sec->rsec, reloc) {
+ unsigned long offset, size;
+ struct reloc *next_reloc;
+
+ if (annotype(elf, sec, reloc) != ANNOTYPE_DATA_SPECIAL)
+ continue;
+
+ offset = reloc_addend(reloc);
+
+ size = 0;
+ next_reloc = reloc;
+ for_each_reloc_continue(sec->rsec, next_reloc) {
+ if (annotype(elf, sec, next_reloc) != ANNOTYPE_DATA_SPECIAL ||
+ next_reloc->sym->sec != reloc->sym->sec)
+ continue;
+
+ size = reloc_addend(next_reloc) - offset;
+ break;
+ }
+
+ if (!size)
+ size = sec_size(reloc->sym->sec) - offset;
+
+ if (create_fake_symbol(elf, reloc->sym->sec, offset, size))
+ return -1;
+ }
+
+ /*
+ * 2) Make symbols for sh_entsize, and simple arrays of pointers:
+ */
+
+ for_each_sec(elf, sec) {
+ unsigned int entry_size;
+ unsigned long offset;
+
+ if (!is_special_section(sec) || find_symbol_by_offset(sec, 0))
+ continue;
+
+ if (!sec->rsec) {
+ ERROR("%s: missing special section relocations", sec->name);
+ return -1;
+ }
+
+ entry_size = sec->sh.sh_entsize;
+ if (!entry_size) {
+ entry_size = arch_reloc_size(sec->rsec->relocs);
+ if (sec_size(sec) != entry_size * sec_num_entries(sec->rsec)) {
+ ERROR("%s: missing special section entsize or annotations", sec->name);
+ return -1;
+ }
+ }
+
+ for (offset = 0; offset < sec_size(sec); offset += entry_size) {
+ if (create_fake_symbol(elf, sec, offset, entry_size))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/* Keep a special section entry if it references an included function */
+static bool should_keep_special_sym(struct elf *elf, struct symbol *sym)
+{
+ struct reloc *reloc;
+
+ if (is_sec_sym(sym) || !sym->sec->rsec)
+ return false;
+
+ sym_for_each_reloc(elf, sym, reloc) {
+ if (convert_reloc_sym(elf, reloc))
+ continue;
+
+ if (is_func_sym(reloc->sym) && reloc->sym->included)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Klp relocations aren't allowed for __jump_table and .static_call_sites if
+ * the referenced symbol lives in a kernel module, because such klp relocs may
+ * be applied after static branch/call init, resulting in code corruption.
+ *
+ * Validate a special section entry to avoid that. Note that an inert
+ * tracepoint is harmless enough, in that case just skip the entry and print a
+ * warning. Otherwise, return an error.
+ *
+ * This is only a temporary limitation which will be fixed when livepatch adds
+ * support for submodules: fully self-contained modules which are embedded in
+ * the top-level livepatch module's data and which can be loaded on demand when
+ * their corresponding to-be-patched module gets loaded. Then klp relocs can
+ * be retired.
+ *
+ * Return:
+ * -1: error: validation failed
+ * 1: warning: tracepoint skipped
+ * 0: success
+ */
+static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym)
+{
+ bool static_branch = !strcmp(sym->sec->name, "__jump_table");
+ bool static_call = !strcmp(sym->sec->name, ".static_call_sites");
+ struct symbol *code_sym = NULL;
+ unsigned long code_offset = 0;
+ struct reloc *reloc;
+ int ret = 0;
+
+ if (!static_branch && !static_call)
+ return 0;
+
+ sym_for_each_reloc(e->patched, sym, reloc) {
+ const char *sym_modname;
+ struct export *export;
+
+ /* Static branch/call keys are always STT_OBJECT */
+ if (reloc->sym->type != STT_OBJECT) {
+
+ /* Save code location which can be printed below */
+ if (reloc->sym->type == STT_FUNC && !code_sym) {
+ code_sym = reloc->sym;
+ code_offset = reloc_addend(reloc);
+ }
+
+ continue;
+ }
+
+ if (!klp_reloc_needed(reloc))
+ continue;
+
+ export = find_export(reloc->sym);
+ if (export) {
+ sym_modname = export->mod;
+ } else {
+ sym_modname = find_modname(e);
+ if (!sym_modname)
+ return -1;
+ }
+
+ /* vmlinux keys are ok */
+ if (!strcmp(sym_modname, "vmlinux"))
+ continue;
+
+ if (static_branch) {
+ if (strstarts(reloc->sym->name, "__tracepoint_")) {
+ WARN("%s: disabling unsupported tracepoint %s",
+ code_sym->name, reloc->sym->name + 13);
+ ret = 1;
+ continue;
+ }
+
+ ERROR("%s+0x%lx: unsupported static branch key %s. Use static_key_enabled() instead",
+ code_sym->name, code_offset, reloc->sym->name);
+ return -1;
+ }
+
+ /* static call */
+ if (strstarts(reloc->sym->name, "__SCK__tp_func_")) {
+ ret = 1;
+ continue;
+ }
+
+ ERROR("%s()+0x%lx: unsupported static call key %s. Use KLP_STATIC_CALL() instead",
+ code_sym->name, code_offset, reloc->sym->name);
+ return -1;
+ }
+
+ return ret;
+}
+
+static int clone_special_section(struct elfs *e, struct section *patched_sec)
+{
+ struct symbol *patched_sym;
+
+ /*
+ * Extract all special section symbols (and their dependencies) which
+ * reference included functions.
+ */
+ sec_for_each_sym(patched_sec, patched_sym) {
+ int ret;
+
+ if (!is_object_sym(patched_sym))
+ continue;
+
+ if (!should_keep_special_sym(e->patched, patched_sym))
+ continue;
+
+ ret = validate_special_section_klp_reloc(e, patched_sym);
+ if (ret < 0)
+ return -1;
+ if (ret > 0)
+ continue;
+
+ if (!clone_symbol(e, patched_sym, true))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Extract only the needed bits from special sections */
+static int clone_special_sections(struct elfs *e)
+{
+ struct section *patched_sec;
+
+ if (create_fake_symbols(e->patched))
+ return -1;
+
+ for_each_sec(e->patched, patched_sec) {
+ if (is_special_section(patched_sec)) {
+ if (clone_special_section(e, patched_sec))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Create __klp_objects and __klp_funcs sections which are intermediate
+ * sections provided as input to the patch module's init code for building the
+ * klp_patch, klp_object and klp_func structs for the livepatch API.
+ */
+static int create_klp_sections(struct elfs *e)
+{
+ size_t obj_size = sizeof(struct klp_object_ext);
+ size_t func_size = sizeof(struct klp_func_ext);
+ struct section *obj_sec, *funcs_sec, *str_sec;
+ struct symbol *funcs_sym, *str_sym, *sym;
+ char sym_name[SYM_NAME_LEN];
+ unsigned int nr_funcs = 0;
+ const char *modname;
+ void *obj_data;
+ s64 addend;
+
+ obj_sec = elf_create_section_pair(e->out, KLP_OBJECTS_SEC, obj_size, 0, 0);
+ if (!obj_sec)
+ return -1;
+
+ funcs_sec = elf_create_section_pair(e->out, KLP_FUNCS_SEC, func_size, 0, 0);
+ if (!funcs_sec)
+ return -1;
+
+ funcs_sym = elf_create_section_symbol(e->out, funcs_sec);
+ if (!funcs_sym)
+ return -1;
+
+ str_sec = elf_create_section(e->out, KLP_STRINGS_SEC, 0, 0,
+ SHT_PROGBITS, 1,
+ SHF_ALLOC | SHF_STRINGS | SHF_MERGE);
+ if (!str_sec)
+ return -1;
+
+ if (elf_add_string(e->out, str_sec, "") == -1)
+ return -1;
+
+ str_sym = elf_create_section_symbol(e->out, str_sec);
+ if (!str_sym)
+ return -1;
+
+ /* allocate klp_object_ext */
+ obj_data = elf_add_data(e->out, obj_sec, NULL, obj_size);
+ if (!obj_data)
+ return -1;
+
+ modname = find_modname(e);
+ if (!modname)
+ return -1;
+
+ /* klp_object_ext.name */
+ if (strcmp(modname, "vmlinux")) {
+ addend = elf_add_string(e->out, str_sec, modname);
+ if (addend == -1)
+ return -1;
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, name),
+ str_sym, addend, R_ABS64))
+ return -1;
+ }
+
+ /* klp_object_ext.funcs */
+ if (!elf_create_reloc(e->out, obj_sec, offsetof(struct klp_object_ext, funcs),
+ funcs_sym, 0, R_ABS64))
+ return -1;
+
+ for_each_sym(e->out, sym) {
+ unsigned long offset = nr_funcs * func_size;
+ unsigned long sympos;
+ void *func_data;
+
+ if (!is_func_sym(sym) || sym->cold || !sym->clone || !sym->clone->changed)
+ continue;
+
+ /* allocate klp_func_ext */
+ func_data = elf_add_data(e->out, funcs_sec, NULL, func_size);
+ if (!func_data)
+ return -1;
+
+ /* klp_func_ext.old_name */
+ addend = elf_add_string(e->out, str_sec, sym->clone->twin->name);
+ if (addend == -1)
+ return -1;
+
+ if (!elf_create_reloc(e->out, funcs_sec,
+ offset + offsetof(struct klp_func_ext, old_name),
+ str_sym, addend, R_ABS64))
+ return -1;
+
+ /* klp_func_ext.new_func */
+ if (!elf_create_reloc(e->out, funcs_sec,
+ offset + offsetof(struct klp_func_ext, new_func),
+ sym, 0, R_ABS64))
+ return -1;
+
+ /* klp_func_ext.sympos */
+ BUILD_BUG_ON(sizeof(sympos) != sizeof_field(struct klp_func_ext, sympos));
+ sympos = find_sympos(e->orig, sym->clone->twin);
+ if (sympos == ULONG_MAX)
+ return -1;
+ memcpy(func_data + offsetof(struct klp_func_ext, sympos), &sympos,
+ sizeof_field(struct klp_func_ext, sympos));
+
+ nr_funcs++;
+ }
+
+ /* klp_object_ext.nr_funcs */
+ BUILD_BUG_ON(sizeof(nr_funcs) != sizeof_field(struct klp_object_ext, nr_funcs));
+ memcpy(obj_data + offsetof(struct klp_object_ext, nr_funcs), &nr_funcs,
+ sizeof_field(struct klp_object_ext, nr_funcs));
+
+ /*
+ * Find callback pointers created by KLP_PRE_PATCH_CALLBACK() and
+ * friends, and add them to the klp object.
+ */
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_PATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, pre_patch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_PATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, post_patch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_UNPATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, pre_unpatch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_UNPATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, post_unpatch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy all .modinfo import_ns= tags to ensure all namespaced exported symbols
+ * can be accessed via normal relocs.
+ */
+static int copy_import_ns(struct elfs *e)
+{
+ struct section *patched_sec, *out_sec = NULL;
+ char *import_ns, *data_end;
+
+ patched_sec = find_section_by_name(e->patched, ".modinfo");
+ if (!patched_sec)
+ return 0;
+
+ import_ns = patched_sec->data->d_buf;
+ if (!import_ns)
+ return 0;
+
+ for (data_end = import_ns + sec_size(patched_sec);
+ import_ns < data_end;
+ import_ns += strlen(import_ns) + 1) {
+
+ import_ns = memmem(import_ns, data_end - import_ns, "import_ns=", 10);
+ if (!import_ns)
+ return 0;
+
+ if (!out_sec) {
+ out_sec = find_section_by_name(e->out, ".modinfo");
+ if (!out_sec) {
+ out_sec = elf_create_section(e->out, ".modinfo", 0,
+ patched_sec->sh.sh_entsize,
+ patched_sec->sh.sh_type,
+ patched_sec->sh.sh_addralign,
+ patched_sec->sh.sh_flags);
+ if (!out_sec)
+ return -1;
+ }
+ }
+
+ if (!elf_add_data(e->out, out_sec, import_ns, strlen(import_ns) + 1))
+ return -1;
+ }
+
+ return 0;
+}
+
+int cmd_klp_diff(int argc, const char **argv)
+{
+ struct elfs e = {0};
+
+ argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0);
+ if (argc != 3)
+ usage_with_options(klp_diff_usage, klp_diff_options);
+
+ objname = argv[0];
+
+ e.orig = elf_open_read(argv[0], O_RDONLY);
+ e.patched = elf_open_read(argv[1], O_RDONLY);
+ e.out = NULL;
+
+ if (!e.orig || !e.patched)
+ return -1;
+
+ if (read_exports())
+ return -1;
+
+ if (read_sym_checksums(e.orig))
+ return -1;
+
+ if (read_sym_checksums(e.patched))
+ return -1;
+
+ if (correlate_symbols(&e))
+ return -1;
+
+ if (mark_changed_functions(&e))
+ return 0;
+
+ e.out = elf_create_file(&e.orig->ehdr, argv[2]);
+ if (!e.out)
+ return -1;
+
+ if (clone_included_functions(&e))
+ return -1;
+
+ if (clone_special_sections(&e))
+ return -1;
+
+ if (create_klp_sections(&e))
+ return -1;
+
+ if (copy_import_ns(&e))
+ return -1;
+
+ if (elf_write(e.out))
+ return -1;
+
+ return elf_close(e.out);
+}
diff --git a/tools/objtool/klp-post-link.c b/tools/objtool/klp-post-link.c
new file mode 100644
index 000000000000..c013e39957b1
--- /dev/null
+++ b/tools/objtool/klp-post-link.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Read the intermediate KLP reloc/symbol representations created by klp diff
+ * and convert them to the proper format required by livepatch. This needs to
+ * run last to avoid linker wreckage. Linkers don't tend to handle the "two
+ * rela sections for a single base section" case very well, nor do they like
+ * SHN_LIVEPATCH.
+ *
+ * This is the final tool in the livepatch module generation pipeline:
+ *
+ * kernel builds -> objtool klp diff -> module link -> objtool klp post-link
+ */
+
+#include <fcntl.h>
+#include <gelf.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <linux/livepatch_external.h>
+
+static int fix_klp_relocs(struct elf *elf)
+{
+ struct section *symtab, *klp_relocs;
+
+ klp_relocs = find_section_by_name(elf, KLP_RELOCS_SEC);
+ if (!klp_relocs)
+ return 0;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ ERROR("missing .symtab");
+ return -1;
+ }
+
+ for (int i = 0; i < sec_size(klp_relocs) / sizeof(struct klp_reloc); i++) {
+ struct klp_reloc *klp_reloc;
+ unsigned long klp_reloc_off;
+ struct section *sec, *tmp, *klp_rsec;
+ unsigned long offset;
+ struct reloc *reloc;
+ char sym_modname[64];
+ char rsec_name[SEC_NAME_LEN];
+ u64 addend;
+ struct symbol *sym, *klp_sym;
+
+ klp_reloc_off = i * sizeof(*klp_reloc);
+ klp_reloc = klp_relocs->data->d_buf + klp_reloc_off;
+
+ /*
+ * Read __klp_relocs[i]:
+ */
+
+ /* klp_reloc.sec_offset */
+ reloc = find_reloc_by_dest(elf, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, offset));
+ if (!reloc) {
+ ERROR("malformed " KLP_RELOCS_SEC " section");
+ return -1;
+ }
+
+ sec = reloc->sym->sec;
+ offset = reloc_addend(reloc);
+
+ /* klp_reloc.sym */
+ reloc = find_reloc_by_dest(elf, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, sym));
+ if (!reloc) {
+ ERROR("malformed " KLP_RELOCS_SEC " section");
+ return -1;
+ }
+
+ klp_sym = reloc->sym;
+ addend = reloc_addend(reloc);
+
+ /* symbol format: .klp.sym.modname.sym_name,sympos */
+ if (sscanf(klp_sym->name + strlen(KLP_SYM_PREFIX), "%55[^.]", sym_modname) != 1)
+ ERROR("can't find modname in klp symbol '%s'", klp_sym->name);
+
+ /*
+ * Create the KLP rela:
+ */
+
+ /* section format: .klp.rela.sec_objname.section_name */
+ if (snprintf_check(rsec_name, SEC_NAME_LEN,
+ KLP_RELOC_SEC_PREFIX "%s.%s",
+ sym_modname, sec->name))
+ return -1;
+
+ klp_rsec = find_section_by_name(elf, rsec_name);
+ if (!klp_rsec) {
+ klp_rsec = elf_create_section(elf, rsec_name, 0,
+ elf_rela_size(elf),
+ SHT_RELA, elf_addr_size(elf),
+ SHF_ALLOC | SHF_INFO_LINK | SHF_RELA_LIVEPATCH);
+ if (!klp_rsec)
+ return -1;
+
+ klp_rsec->sh.sh_link = symtab->idx;
+ klp_rsec->sh.sh_info = sec->idx;
+ klp_rsec->base = sec;
+ }
+
+ tmp = sec->rsec;
+ sec->rsec = klp_rsec;
+ if (!elf_create_reloc(elf, sec, offset, klp_sym, addend, klp_reloc->type))
+ return -1;
+ sec->rsec = tmp;
+
+ /*
+ * Fix up the corresponding KLP symbol:
+ */
+
+ klp_sym->sym.st_shndx = SHN_LIVEPATCH;
+ if (!gelf_update_sym(symtab->data, klp_sym->idx, &klp_sym->sym)) {
+ ERROR_ELF("gelf_update_sym");
+ return -1;
+ }
+
+ /*
+ * Disable the original non-KLP reloc by converting it to R_*_NONE:
+ */
+
+ reloc = find_reloc_by_dest(elf, sec, offset);
+ sym = reloc->sym;
+ sym->sym.st_shndx = SHN_LIVEPATCH;
+ set_reloc_type(elf, reloc, 0);
+ if (!gelf_update_sym(symtab->data, sym->idx, &sym->sym)) {
+ ERROR_ELF("gelf_update_sym");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * This runs on the livepatch module after all other linking has been done. It
+ * converts the intermediate __klp_relocs section into proper KLP relocs to be
+ * processed by livepatch. This needs to run last to avoid linker wreckage.
+ * Linkers don't tend to handle the "two rela sections for a single base
+ * section" case very well, nor do they appreciate SHN_LIVEPATCH.
+ */
+int cmd_klp_post_link(int argc, const char **argv)
+{
+ struct elf *elf;
+
+ argc--;
+ argv++;
+
+ if (argc != 1) {
+ fprintf(stderr, "%d\n", argc);
+ fprintf(stderr, "usage: objtool link <file.ko>\n");
+ return -1;
+ }
+
+ elf = elf_open_read(argv[0], O_RDWR);
+ if (!elf)
+ return -1;
+
+ if (fix_klp_relocs(elf))
+ return -1;
+
+ if (elf_write(elf))
+ return -1;
+
+ return elf_close(elf);
+}
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6a922d046b8e..14f8ab653449 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -36,6 +36,7 @@ NORETURN(machine_real_restart)
NORETURN(make_task_dead)
NORETURN(mpt_halt_firmware)
NORETURN(mwait_play_dead)
+NORETURN(native_play_dead)
NORETURN(nmi_panic_self_stop)
NORETURN(panic)
NORETURN(vpanic)
@@ -45,7 +46,6 @@ NORETURN(rewind_stack_and_make_dead)
NORETURN(rust_begin_unwind)
NORETURN(rust_helper_BUG)
NORETURN(sev_es_terminate)
-NORETURN(snp_abort)
NORETURN(start_kernel)
NORETURN(stop_this_cpu)
NORETURN(usercopy_abort)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 5c8b974ad0f9..1c3622117c33 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -16,7 +16,8 @@
#include <objtool/objtool.h>
#include <objtool/warn.h>
-bool help;
+bool debug;
+int indent;
static struct objtool_file file;
@@ -71,13 +72,54 @@ int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
return 0;
}
+char *top_level_dir(const char *file)
+{
+ ssize_t len, self_len, file_len;
+ char self[PATH_MAX], *str;
+ int i;
+
+ len = readlink("/proc/self/exe", self, sizeof(self) - 1);
+ if (len <= 0)
+ return NULL;
+ self[len] = '\0';
+
+ for (i = 0; i < 3; i++) {
+ char *s = strrchr(self, '/');
+ if (!s)
+ return NULL;
+ *s = '\0';
+ }
+
+ self_len = strlen(self);
+ file_len = strlen(file);
+
+ str = malloc(self_len + file_len + 2);
+ if (!str)
+ return NULL;
+
+ memcpy(str, self, self_len);
+ str[self_len] = '/';
+ strcpy(str + self_len + 1, file);
+
+ return str;
+}
+
int main(int argc, const char **argv)
{
static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
+ if (init_signal_handler())
+ return -1;
+
/* libsubcmd init */
exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
pager_init(UNUSED);
+ if (argc > 1 && !strcmp(argv[1], "klp")) {
+ argc--;
+ argv++;
+ return cmd_klp(argc, argv);
+ }
+
return objtool_run(argc, argv);
}
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 1dd9fc18fe62..5a979f52425a 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -8,7 +8,6 @@
#include <objtool/objtool.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int orc_dump(const char *filename)
{
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 922e6aac7cea..1045e1380ffd 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -12,7 +12,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
struct orc_list_entry {
struct list_head list;
@@ -57,7 +56,7 @@ int orc_create(struct objtool_file *file)
/* Build a deduplicated list of ORC entries: */
INIT_LIST_HEAD(&orc_list);
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
struct orc_entry orc, prev_orc = {0};
struct instruction *insn;
bool empty = true;
@@ -127,7 +126,11 @@ int orc_create(struct objtool_file *file)
return -1;
}
orc_sec = elf_create_section(file->elf, ".orc_unwind",
- sizeof(struct orc_entry), nr);
+ nr * sizeof(struct orc_entry),
+ sizeof(struct orc_entry),
+ SHT_PROGBITS,
+ 1,
+ SHF_ALLOC);
if (!orc_sec)
return -1;
diff --git a/tools/objtool/signal.c b/tools/objtool/signal.c
new file mode 100644
index 000000000000..af5c65c0fb2d
--- /dev/null
+++ b/tools/objtool/signal.c
@@ -0,0 +1,135 @@
+/*
+ * signal.c: Register a sigaltstack for objtool, to be able to
+ * run a signal handler on a separate stack even if
+ * the main process stack has overflown. Print out
+ * stack overflow errors when this happens.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <string.h>
+
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+
+static unsigned long stack_limit;
+
+static bool is_stack_overflow(void *fault_addr)
+{
+ unsigned long fault = (unsigned long)fault_addr;
+
+ /* Check if fault is in the guard page just below the limit. */
+ return fault < stack_limit && fault >= stack_limit - 4096;
+}
+
+static void signal_handler(int sig_num, siginfo_t *info, void *context)
+{
+ struct sigaction sa_dfl = {0};
+ const char *sig_name;
+ char msg[256];
+ int msg_len;
+
+ switch (sig_num) {
+ case SIGSEGV: sig_name = "SIGSEGV"; break;
+ case SIGBUS: sig_name = "SIGBUS"; break;
+ case SIGILL: sig_name = "SIGILL"; break;
+ case SIGABRT: sig_name = "SIGABRT"; break;
+ default: sig_name = "Unknown signal"; break;
+ }
+
+ if (is_stack_overflow(info->si_addr)) {
+ msg_len = snprintf(msg, sizeof(msg),
+ "%s: error: %s: objtool stack overflow!\n",
+ objname, sig_name);
+ } else {
+ msg_len = snprintf(msg, sizeof(msg),
+ "%s: error: %s: objtool crash!\n",
+ objname, sig_name);
+ }
+
+ msg_len = write(STDERR_FILENO, msg, msg_len);
+
+ /* Re-raise the signal to trigger the core dump */
+ sa_dfl.sa_handler = SIG_DFL;
+ sigaction(sig_num, &sa_dfl, NULL);
+ raise(sig_num);
+}
+
+static int read_stack_limit(void)
+{
+ unsigned long stack_start, stack_end;
+ struct rlimit rlim;
+ char line[256];
+ int ret = 0;
+ FILE *fp;
+
+ if (getrlimit(RLIMIT_STACK, &rlim)) {
+ ERROR_GLIBC("getrlimit");
+ return -1;
+ }
+
+ fp = fopen("/proc/self/maps", "r");
+ if (!fp) {
+ ERROR_GLIBC("fopen");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ if (strstr(line, "[stack]")) {
+ if (sscanf(line, "%lx-%lx", &stack_start, &stack_end) != 2) {
+ ERROR_GLIBC("sscanf");
+ ret = -1;
+ goto done;
+ }
+ stack_limit = stack_end - rlim.rlim_cur;
+ goto done;
+ }
+ }
+
+ ret = -1;
+ ERROR("/proc/self/maps: can't find [stack]");
+
+done:
+ fclose(fp);
+
+ return ret;
+}
+
+int init_signal_handler(void)
+{
+ int signals[] = {SIGSEGV, SIGBUS, SIGILL, SIGABRT};
+ struct sigaction sa;
+ stack_t ss;
+
+ if (read_stack_limit())
+ return -1;
+
+ ss.ss_sp = malloc(SIGSTKSZ);
+ if (!ss.ss_sp) {
+ ERROR_GLIBC("malloc");
+ return -1;
+ }
+ ss.ss_size = SIGSTKSZ;
+ ss.ss_flags = 0;
+
+ if (sigaltstack(&ss, NULL) == -1) {
+ ERROR_GLIBC("sigaltstack");
+ return -1;
+ }
+
+ sa.sa_sigaction = signal_handler;
+ sigemptyset(&sa.sa_mask);
+
+ sa.sa_flags = SA_ONSTACK | SA_SIGINFO;
+
+ for (int i = 0; i < ARRAY_SIZE(signals); i++) {
+ if (sigaction(signals[i], &sa, NULL) == -1) {
+ ERROR_GLIBC("sigaction");
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index c80fed8a840e..2a533afbc69a 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -15,7 +15,6 @@
#include <objtool/builtin.h>
#include <objtool/special.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
struct special_entry {
const char *sec;
@@ -82,6 +81,8 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
entry->orig_len);
alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
entry->new_len);
+ alt->feature = *(unsigned int *)(sec->data->d_buf + offset +
+ entry->feature);
}
orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
@@ -133,7 +134,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
struct section *sec;
unsigned int nr_entries;
struct special_alt *alt;
- int idx, ret;
+ int idx;
INIT_LIST_HEAD(alts);
@@ -142,12 +143,12 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
if (!sec)
continue;
- if (sec->sh.sh_size % entry->size != 0) {
+ if (sec_size(sec) % entry->size != 0) {
ERROR("%s size not a multiple of %d", sec->name, entry->size);
return -1;
}
- nr_entries = sec->sh.sh_size / entry->size;
+ nr_entries = sec_size(sec) / entry->size;
for (idx = 0; idx < nr_entries; idx++) {
alt = malloc(sizeof(*alt));
@@ -157,11 +158,8 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
}
memset(alt, 0, sizeof(*alt));
- ret = get_alt_entry(elf, entry, sec, idx, alt);
- if (ret > 0)
- continue;
- if (ret < 0)
- return ret;
+ if (get_alt_entry(elf, entry, sec, idx, alt))
+ return -1;
list_add_tail(&alt->list, alts);
}
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 81d120d05442..e38167ca56a9 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -16,6 +16,8 @@ arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
+include/linux/interval_tree_generic.h
+include/linux/livepatch_external.h
include/linux/static_call_types.h
"
diff --git a/tools/objtool/trace.c b/tools/objtool/trace.c
new file mode 100644
index 000000000000..5dec44dab781
--- /dev/null
+++ b/tools/objtool/trace.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#include <objtool/trace.h>
+
+bool trace;
+int trace_depth;
+
+/*
+ * Macros to trace CFI state attributes changes.
+ */
+
+#define TRACE_CFI_ATTR(attr, prev, next, fmt, ...) \
+({ \
+ if ((prev)->attr != (next)->attr) \
+ TRACE("%s=" fmt " ", #attr, __VA_ARGS__); \
+})
+
+#define TRACE_CFI_ATTR_BOOL(attr, prev, next) \
+ TRACE_CFI_ATTR(attr, prev, next, \
+ "%s", (next)->attr ? "true" : "false")
+
+#define TRACE_CFI_ATTR_NUM(attr, prev, next, fmt) \
+ TRACE_CFI_ATTR(attr, prev, next, fmt, (next)->attr)
+
+#define CFI_REG_NAME_MAXLEN 16
+
+/*
+ * Return the name of a register. Note that the same static buffer
+ * is returned if the name is dynamically generated.
+ */
+static const char *cfi_reg_name(unsigned int reg)
+{
+ static char rname_buffer[CFI_REG_NAME_MAXLEN];
+ const char *rname;
+
+ switch (reg) {
+ case CFI_UNDEFINED:
+ return "<undefined>";
+ case CFI_CFA:
+ return "cfa";
+ case CFI_SP_INDIRECT:
+ return "(sp)";
+ case CFI_BP_INDIRECT:
+ return "(bp)";
+ }
+
+ if (reg < CFI_NUM_REGS) {
+ rname = arch_reg_name[reg];
+ if (rname)
+ return rname;
+ }
+
+ if (snprintf(rname_buffer, CFI_REG_NAME_MAXLEN, "r%d", reg) == -1)
+ return "<error>";
+
+ return (const char *)rname_buffer;
+}
+
+/*
+ * Functions and macros to trace CFI registers changes.
+ */
+
+static void trace_cfi_reg(const char *prefix, int reg, const char *fmt,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ char *rname;
+
+ if (base_prev == base_next && offset_prev == offset_next)
+ return;
+
+ if (prefix)
+ TRACE("%s:", prefix);
+
+ if (base_next == CFI_UNDEFINED) {
+ TRACE("%1$s=<undef> ", cfi_reg_name(reg));
+ } else {
+ rname = strdup(cfi_reg_name(reg));
+ TRACE(fmt, rname, cfi_reg_name(base_next), offset_next);
+ free(rname);
+ }
+}
+
+static void trace_cfi_reg_val(const char *prefix, int reg,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ trace_cfi_reg(prefix, reg, "%1$s=%2$s%3$+d ",
+ base_prev, offset_prev, base_next, offset_next);
+}
+
+static void trace_cfi_reg_ref(const char *prefix, int reg,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ trace_cfi_reg(prefix, reg, "%1$s=(%2$s%3$+d) ",
+ base_prev, offset_prev, base_next, offset_next);
+}
+
+#define TRACE_CFI_REG_VAL(reg, prev, next) \
+ trace_cfi_reg_val(NULL, reg, prev.base, prev.offset, \
+ next.base, next.offset)
+
+#define TRACE_CFI_REG_REF(reg, prev, next) \
+ trace_cfi_reg_ref(NULL, reg, prev.base, prev.offset, \
+ next.base, next.offset)
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+ struct insn_state *snext)
+{
+ struct cfi_state *cprev, *cnext;
+ int i;
+
+ if (!memcmp(sprev, snext, sizeof(struct insn_state)))
+ return;
+
+ cprev = &sprev->cfi;
+ cnext = &snext->cfi;
+
+ disas_print_insn(stderr, objtool_disas_ctx, insn,
+ trace_depth - 1, "state: ");
+
+ /* print registers changes */
+ TRACE_CFI_REG_VAL(CFI_CFA, cprev->cfa, cnext->cfa);
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ TRACE_CFI_REG_VAL(i, cprev->vals[i], cnext->vals[i]);
+ TRACE_CFI_REG_REF(i, cprev->regs[i], cnext->regs[i]);
+ }
+
+ /* print attributes changes */
+ TRACE_CFI_ATTR_NUM(stack_size, cprev, cnext, "%d");
+ TRACE_CFI_ATTR_BOOL(drap, cprev, cnext);
+ if (cnext->drap) {
+ trace_cfi_reg_val("drap", cnext->drap_reg,
+ cprev->drap_reg, cprev->drap_offset,
+ cnext->drap_reg, cnext->drap_offset);
+ }
+ TRACE_CFI_ATTR_BOOL(bp_scratch, cprev, cnext);
+ TRACE_CFI_ATTR_NUM(instr, sprev, snext, "%d");
+ TRACE_CFI_ATTR_NUM(uaccess_stack, sprev, snext, "%u");
+
+ TRACE("\n");
+
+ insn->trace = 1;
+}
+
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name)
+{
+ struct instruction *alt_insn;
+ char suffix[2];
+
+ alt_insn = alt->insn;
+
+ if (alt->type == ALT_TYPE_EX_TABLE) {
+ /*
+ * When there is an exception table then the instruction
+ * at the original location is executed but it can cause
+ * an exception. In that case, the execution will be
+ * redirected to the alternative instruction.
+ *
+ * The instruction at the original location can have
+ * instruction alternatives, so we just print the location
+ * of the instruction that can cause the exception and
+ * not the instruction itself.
+ */
+ TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s for instruction at 0x%lx <%s+0x%lx>",
+ alt_name,
+ orig_insn->offset, orig_insn->sym->name,
+ orig_insn->offset - orig_insn->sym->offset);
+ } else {
+ TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s", alt_name);
+ }
+
+ if (alt->type == ALT_TYPE_JUMP_TABLE) {
+ /*
+ * For a jump alternative, if the default instruction is
+ * a NOP then it is replaced with the jmp instruction,
+ * otherwise it is replaced with a NOP instruction.
+ */
+ trace_depth++;
+ if (orig_insn->type == INSN_NOP) {
+ suffix[0] = (orig_insn->len == 5) ? 'q' : '\0';
+ TRACE_ADDR(orig_insn, "jmp%-3s %lx <%s+0x%lx>", suffix,
+ alt_insn->offset, alt_insn->sym->name,
+ alt_insn->offset - alt_insn->sym->offset);
+ } else {
+ TRACE_ADDR(orig_insn, "nop%d", orig_insn->len);
+ trace_depth--;
+ }
+ }
+}
+
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name)
+{
+ if (alt->type == ALT_TYPE_JUMP_TABLE && orig_insn->type == INSN_NOP)
+ trace_depth--;
+ TRACE_ALT_INFO_NOADDR(orig_insn, "\\ ", "%s", alt_name);
+}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index d83f607733b0..d6562f292259 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -8,6 +8,8 @@
#include <stdbool.h>
#include <errno.h>
#include <objtool/objtool.h>
+#include <objtool/arch.h>
+#include <objtool/builtin.h>
#define UNSUPPORTED(name) \
({ \
@@ -24,3 +26,8 @@ int __weak orc_create(struct objtool_file *file)
{
UNSUPPORTED("ORC");
}
+
+int __weak cmd_klp(int argc, const char **argv)
+{
+ UNSUPPORTED("klp");
+}
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
index 83dc87c662b6..57b226e7fc2f 100644
--- a/tools/perf/Documentation/Build.txt
+++ b/tools/perf/Documentation/Build.txt
@@ -99,3 +99,18 @@ configuration paths for cross building:
In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the
variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to
the library paths for cross compilation.
+
+5) Build with Clang
+===================
+By default, the makefile uses GCC as compiler. With specifying environment
+variables HOSTCC, CC and CXX, it allows to build perf with Clang.
+
+Using Clang for a native build:
+
+ $ HOSTCC=clang CC=clang CXX=clang++ make -C tools/perf
+
+Specifying ARCH and CROSS_COMPILE for cross compilation:
+
+ $ HOSTCC=clang CC=clang CXX=clang++ \
+ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \
+ make -C tools/perf
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
index 24a59998fc91..3f3cc7ac3d13 100644
--- a/tools/perf/Documentation/android.txt
+++ b/tools/perf/Documentation/android.txt
@@ -1,78 +1,10 @@
How to compile perf for Android
-=========================================
+===============================
-I. Set the Android NDK environment
-------------------------------------------------
+There are two ways to build perf and run it on Android:
-(a). Use the Android NDK
-------------------------------------------------
-1. You need to download and install the Android Native Development Kit (NDK).
-Set the NDK variable to point to the path where you installed the NDK:
- export NDK=/path/to/android-ndk
+- Method 1: Build perf with static linking. See Build.txt, section
+ "4) Cross compilation" for how to build a static perf binary.
-2. Set cross-compiling environment variables for NDK toolchain and sysroot.
-For arm:
- export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
- export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
-For x86:
- export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
- export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
-
-This method is only tested for Android NDK versions Revision 11b and later.
-perf uses some bionic enhancements that are not included in prior NDK versions.
-You can use method (b) described below instead.
-
-(b). Use the Android source tree
------------------------------------------------
-1. Download the master branch of the Android source tree.
-Set the environment for the target you want using:
- source build/envsetup.sh
- lunch
-
-2. Build your own NDK sysroot to contain latest bionic changes and set the
-NDK sysroot environment variable.
- cd ${ANDROID_BUILD_TOP}/ndk
-For arm:
- ./build/tools/build-ndk-sysroot.sh --abi=arm
- export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
-For x86:
- ./build/tools/build-ndk-sysroot.sh --abi=x86
- export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
-
-3. Set the NDK toolchain environment variable.
-For arm:
- export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
-For x86:
- export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
-
-II. Compile perf for Android
-------------------------------------------------
-You need to run make with the NDK toolchain and sysroot defined above:
-For arm:
- make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
-For x86:
- make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
-
-III. Install perf
------------------------------------------------
-You need to connect to your Android device/emulator using adb.
-Install perf using:
- adb push perf /data/perf
-
-If you also want to use perf-archive you need busybox tools for Android.
-For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
- sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
- chmod +x /tmp/perf-archive
- adb push /tmp/perf-archive /data/perf-archive
-
-IV. Environment settings for running perf
-------------------------------------------------
-Some perf features need environment variables to run properly.
-You need to set these before running perf on the target:
- adb shell
- # PERF_PAGER=cat
-
-IV. Run perf
-------------------------------------------------
-Run perf on your device/emulator to which you previously connected using adb:
- # ./data/perf
+- Method 2: Download the Android NDK and use the bundled Clang to
+ build perf. See Build.txt, section "5) Build with clang" for details.
diff --git a/tools/perf/Documentation/intel-acr.txt b/tools/perf/Documentation/intel-acr.txt
new file mode 100644
index 000000000000..72654fdd9a52
--- /dev/null
+++ b/tools/perf/Documentation/intel-acr.txt
@@ -0,0 +1,53 @@
+Intel Auto Counter Reload Support
+---------------------------------
+Support for Intel Auto Counter Reload in perf tools
+
+Auto counter reload provides a means for software to specify to hardware
+that certain counters, if supported, should be automatically reloaded
+upon overflow of chosen counters. By taking a sample only if the rate of
+one event exceeds some threshold relative to the rate of another event,
+this feature enables software to sample based on the relative rate of
+two or more events. To enable this, the user must provide a sample period
+term and a bitmask ("acr_mask") for each relevant event specifying the
+counters in an event group to reload if the event's specified sample
+period is exceeded.
+
+For example, if the user desires to measure a scenario when IPC > 2,
+the event group might look like the one below:
+
+ perf record -e {cpu_atom/instructions,period=200000,acr_mask=0x2/, \
+ cpu_atom/cycles,period=100000,acr_mask=0x3/} -- true
+
+In this case, if the "instructions" counter exceeds the sample period of
+200000, the second counter, "cycles", will be reset and a sample will be
+taken. If "cycles" is exceeded first, both counters in the group will be
+reset. In this way, samples will only be taken for cases where IPC > 2.
+
+The acr_mask term is a hexadecimal value representing a bitmask of the
+events in the group to be reset when the period is exceeded. In the
+example above, "instructions" is assigned an acr_mask of 0x2, meaning
+only the second event in the group is reloaded and a sample is taken
+for the first event. "cycles" is assigned an acr_mask of 0x3, meaning
+that both event counters will be reset if the sample period is exceeded
+first.
+
+ratio-to-prev Event Term
+------------------------
+To simplify this, an event term "ratio-to-prev" is provided which is used
+alongside the sample period term n or the -c/--count option. This would
+allow users to specify the desired relative rate between events as a
+ratio. Note: Both events compared must belong to the same PMU.
+
+The command above would then become
+
+ perf record -e {cpu_atom/instructions/, \
+ cpu_atom/cycles,period=100000,ratio-to-prev=0.5/} -- true
+
+ratio-to-prev is the ratio of the event using the term relative
+to the previous event in the group, which will always be 1,
+for a 1:0.5 or 2:1 ratio.
+
+To sample for IPC < 2 for example, the events need to be reordered:
+
+ perf record -e {cpu_atom/cycles/, \
+ cpu_atom/instructions,period=200000,ratio-to-prev=2.0/} -- true
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 46090c5b42b4..547f1a268018 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -170,7 +170,6 @@ include::itrace.txt[]
--code-with-type::
Show data type info in code annotation (for memory instructions only).
- Currently it only works with --stdio option.
SEE ALSO
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt
index 37afade4f1b2..8b02e5b983fa 100644
--- a/tools/perf/Documentation/perf-arm-spe.txt
+++ b/tools/perf/Documentation/perf-arm-spe.txt
@@ -141,27 +141,65 @@ Config parameters
These are placed between the // in the event and comma separated. For example '-e
arm_spe/load_filter=1,min_latency=10/'
- branch_filter=1 - collect branches only (PMSFCR.B)
- event_filter=<mask> - filter on specific events (PMSEVFR) - see bitfield description below
+ event_filter=<mask> - logical AND filter on specific events (PMSEVFR) - see bitfield description below
+ inv_event_filter=<mask> - logical OR to filter out specific events (PMSNEVFR, FEAT_SPEv1p2) - see bitfield description below
jitter=1 - use jitter to avoid resonance when sampling (PMSIRR.RND)
- load_filter=1 - collect loads only (PMSFCR.LD)
min_latency=<n> - collect only samples with this latency or higher* (PMSLATFR)
pa_enable=1 - collect physical address (as well as VA) of loads/stores (PMSCR.PA) - requires privilege
pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege
- store_filter=1 - collect stores only (PMSFCR.ST)
ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS)
discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD)
+ inv_data_src_filter=<mask> - mask to filter from 0-63 possible data sources (PMSDSFR, FEAT_SPE_FDS) - See 'Data source filtering'
+++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather
than only the execution latency.
-Only some events can be filtered on; these include:
-
- bit 1 - instruction retired (i.e. omit speculative instructions)
+Only some events can be filtered on using 'event_filter' bits. The overall
+filter is the logical AND of these bits, for example if bits 3 and 5 are set
+only samples that have both 'L1D cache refill' AND 'TLB walk' are recorded. When
+FEAT_SPEv1p2 is implemented 'inv_event_filter' can also be used to exclude
+events that have any (OR) of the filter's bits set. For example setting bits 3
+and 5 in 'inv_event_filter' will exclude any events that are either L1D cache
+refill OR TLB walk. If the same bit is set in both filters it's UNPREDICTABLE
+whether the sample is included or excluded. Filter bits for both event_filter
+and inv_event_filter are:
+
+ bit 1 - Instruction retired (i.e. omit speculative instructions)
+ bit 2 - L1D access (FEAT_SPEv1p4)
bit 3 - L1D refill
+ bit 4 - TLB access (FEAT_SPEv1p4)
bit 5 - TLB refill
- bit 7 - mispredict
- bit 11 - misaligned access
+ bit 6 - Not taken event (FEAT_SPEv1p2)
+ bit 7 - Mispredict
+ bit 8 - Last level cache access (FEAT_SPEv1p4)
+ bit 9 - Last level cache miss (FEAT_SPEv1p4)
+ bit 10 - Remote access (FEAT_SPEv1p4)
+ bit 11 - Misaligned access (FEAT_SPEv1p1)
+ bit 12-15 - IMPLEMENTATION DEFINED events (when implemented)
+ bit 16 - Transaction (FEAT_TME)
+ bit 17 - Partial or empty SME or SVE predicate (FEAT_SPEv1p1)
+ bit 18 - Empty SME or SVE predicate (FEAT_SPEv1p1)
+ bit 19 - L2D access (FEAT_SPEv1p4)
+ bit 20 - L2D miss (FEAT_SPEv1p4)
+ bit 21 - Cache data modified (FEAT_SPEv1p4)
+ bit 22 - Recently fetched (FEAT_SPEv1p4)
+ bit 23 - Data snooped (FEAT_SPEv1p4)
+ bit 24 - Streaming SVE mode event (when FEAT_SPE_SME is implemented), or
+ IMPLEMENTATION DEFINED event 24 (when implemented, only versions
+ less than FEAT_SPEv1p4)
+ bit 25 - SMCU or external coprocessor operation event when FEAT_SPE_SME is
+ implemented, or IMPLEMENTATION DEFINED event 25 (when implemented,
+ only versions less than FEAT_SPEv1p4)
+ bit 26-31 - IMPLEMENTATION DEFINED events (only versions less than FEAT_SPEv1p4)
+ bit 48-63 - IMPLEMENTATION DEFINED events (when implemented)
+
+For IMPLEMENTATION DEFINED bits, refer to the CPU TRM if these bits are
+implemented.
+
+The driver will reject events if requested filter bits require unimplemented SPE
+versions, but will not reject filter bits for unimplemented IMPDEF bits or when
+their related feature is not present (e.g. SME). For example, if FEAT_SPEv1p2 is
+not implemented, filtering on "Not taken event" (bit 6) will be rejected.
So to sample just retired instructions:
@@ -171,6 +209,31 @@ or just mispredicted branches:
perf record -e arm_spe/event_filter=0x80/ -- ./mybench
+When set, the following filters can be used to select samples that match any of
+the operation types (OR filtering). If only one is set then only samples of that
+type are collected:
+
+ branch_filter=1 - Collect branches (PMSFCR.B)
+ load_filter=1 - Collect loads (PMSFCR.LD)
+ store_filter=1 - Collect stores (PMSFCR.ST)
+
+When extended filtering is supported (FEAT_SPE_EFT), SIMD and float
+pointer operations can also be selected:
+
+ simd_filter=1 - Collect SIMD loads, stores and operations (PMSFCR.SIMD)
+ float_filter=1 - Collect floating point loads, stores and operations (PMSFCR.FP)
+
+When extended filtering is supported (FEAT_SPE_EFT), operation type filters can
+be changed to AND using _mask fields. For example samples could be selected if
+they are store AND SIMD by setting 'store_filter=1,simd_filter=1,
+store_filter_mask=1,simd_filter_mask=1'. The new masks are as follows:
+
+ branch_filter_mask=1 - Change branch filter behavior from OR to AND (PMSFCR.Bm)
+ load_filter_mask=1 - Change load filter behavior from OR to AND (PMSFCR.LDm)
+ store_filter_mask=1 - Change store filter behavior from OR to AND (PMSFCR.STm)
+ simd_filter_mask=1 - Change SIMD filter behavior from OR to AND (PMSFCR.SIMDm)
+ float_filter_mask=1 - Change floating point filter behavior from OR to AND (PMSFCR.FPm)
+
Viewing the data
~~~~~~~~~~~~~~~~~
@@ -191,19 +254,29 @@ groups:
36 branch
0 remote-access
900 memory
+ 1800 instructions
The arm_spe// and dummy:u events are implementation details and are expected to be empty.
-To get a full list of unique samples that are not sorted into groups, set the itrace option to
-generate 'instruction' samples. The period option is also taken into account, so set it to 1
-instruction unless you want to further downsample the already sampled SPE data:
+The instructions group contains the full list of unique samples that are not
+sorted into other groups. To generate only this group use --itrace=i1i.
- perf report --itrace=i1i
+1i (1 instruction interval) signifies no further downsampling. Rather than an
+instruction interval, this generates a sample every n SPE samples. For example
+to generate the default set of events for every 100 SPE samples:
+
+ perf report --itrace==bxofmtMai100i
+
+Other period types, for example nanoseconds (ns) are not currently supported.
Memory access details are also stored on the samples and this can be viewed with:
perf report --mem-mode
+The latency value from the SPE sample is stored in the 'weight' field of the
+Perf samples and can be displayed in Perf script and report outputs by enabling
+its display from the command line.
+
Common errors
~~~~~~~~~~~~~
@@ -247,6 +320,25 @@ to minimize output. Then run perf stat:
perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null &
perf stat -e SAMPLE_FEED_LD
+Data source filtering
+~~~~~~~~~~~~~~~~~~~~~
+
+When FEAT_SPE_FDS is present, 'inv_data_src_filter' can be used as a mask to
+filter on a subset (0 - 63) of possible data source IDs. The full range of data
+sources is 0 - 65535 although these are unlikely to be used in practice. Data
+sources are IMPDEF so refer to the TRM for the mappings. Each bit N of the
+filter maps to data source N. The filter is an OR of all the bits, and the value
+provided inv_data_src_filter is inverted before writing to PMSDSFR_EL1 so that
+set bits exclude that data source and cleared bits include that data source.
+Therefore the default value of 0 is equivalent to no filtering (all data sources
+included).
+
+For example, to include only data sources 0 and 3, clear bits 0 and 3
+(0xFFFFFFFFFFFFFFF6)
+
+When 'inv_data_src_filter' is set to 0xFFFFFFFFFFFFFFFF, any samples with any
+data source set are excluded.
+
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 8331bd28b10e..1160224cb718 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -177,11 +177,21 @@ Suite for evaluating performance of simple memory copy in various ways.
Options of *memcpy*
^^^^^^^^^^^^^^^^^^^
--l::
+-s::
--size::
Specify size of memory to copy (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-k::
+--chunk::
+Specify the chunk-size for each invocation. (default: 0, or full-extent)
+Available units are B, KB, MB, GB and TB (case insensitive).
+
-f::
--function::
Specify function to copy (default: default).
@@ -201,11 +211,21 @@ Suite for evaluating performance of simple memory set in various ways.
Options of *memset*
^^^^^^^^^^^^^^^^^^^
--l::
+-s::
--size::
Specify size of memory to set (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-k::
+--chunk::
+Specify the chunk-size for each invocation. (default: 0, or full-extent)
+Available units are B, KB, MB, GB and TB (case insensitive).
+
-f::
--function::
Specify function to set (default: default).
@@ -220,6 +240,40 @@ Repeat memset invocation this number of times.
--cycles::
Use perf's cpu-cycles event instead of gettimeofday syscall.
+*mmap*::
+Suite for evaluating memory subsystem performance for mmap()'d memory.
+
+Options of *mmap*
+^^^^^^^^^^^^^^^^^
+-s::
+--size::
+Specify size of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-r::
+--randomize::
+Specify seed to randomize page access offset (default: 0, or not randomized).
+
+-f::
+--function::
+Specify function to set (default: all).
+Available functions are 'demand' and 'populate', with the first
+demand faulting pages in the region and the second using an eager
+mapping.
+
+-l::
+--nr_loops::
+Repeat mmap() invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
SUITES FOR 'numa'
~~~~~~~~~~~~~~~~~
*mem*::
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index f4af2dd6ab31..40b0f71a2c44 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -143,6 +143,13 @@ REPORT OPTIONS
feature, which causes cacheline sharing to behave like the cacheline
size is doubled.
+-M::
+--disassembler-style=::
+ Set disassembler style for objdump.
+
+--objdump=<path>::
+ Path to objdump binary.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt
index ee92042082f7..09e1d35677f5 100644
--- a/tools/perf/Documentation/perf-check.txt
+++ b/tools/perf/Documentation/perf-check.txt
@@ -50,12 +50,12 @@ feature::
dwarf / HAVE_LIBDW_SUPPORT
dwarf_getlocations / HAVE_LIBDW_SUPPORT
dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT
- auxtrace / HAVE_AUXTRACE_SUPPORT
libbfd / HAVE_LIBBFD_SUPPORT
libbpf-strings / HAVE_LIBBPF_STRINGS_SUPPORT
libcapstone / HAVE_LIBCAPSTONE_SUPPORT
libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT
libelf / HAVE_LIBELF_SUPPORT
+ libLLVM / HAVE_LIBLLVM_SUPPORT
libnuma / HAVE_LIBNUMA_SUPPORT
libopencsd / HAVE_CSTRACE_SUPPORT
libperl / HAVE_LIBPERL_SUPPORT
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c6f335659667..642d1c490d9e 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -452,6 +452,9 @@ call-graph.*::
kernel space is controlled not by this option but by the
kernel config (CONFIG_UNWINDER_*).
+ The 'defer' mode can be used with 'fp' mode to enable deferred
+ user callchains (like 'fp,defer').
+
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
When using dwarf into record-mode, the default size will be used if omitted.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index f3067a4af294..58efab72d2e5 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -285,7 +285,7 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
- period being the hist entry period value
- - WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option
+ - WEIGHT-A/WEIGHT-B being user supplied weights in the '-c' option
behind ':' separator like '-c wdiff:1,2'.
- WEIGHT-A being the weight of the data file
- WEIGHT-B being the weight of the baseline data file
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 28215306a78a..a4378a0cd914 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -73,6 +73,7 @@ counted. The following modifiers exist:
e - group or event are exclusive and do not share the PMU
b - use BPF aggregration (see perf stat --bpf-counters)
R - retire latency value of the event
+ X - don't regroup the event to match PMUs
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -392,6 +393,8 @@ Support raw format:
. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
a certain kind of events.
+include::intel-acr.txt[]
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 067891bd7da6..e8b9aadbbfa5 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -325,6 +325,10 @@ OPTIONS
by default. User can change the number by passing it after comma
like "--call-graph fp,32".
+ Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
+ enable deferred user callchain which will collect user-space callchains
+ when the thread returns to the user space.
+
-q::
--quiet::
Don't print any warnings or messages, useful for scripting.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 28bec7e78bc8..03d112960632 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -527,6 +527,11 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
+--merge-callchains::
+ Enable merging deferred user callchains if available. This is the
+ default behavior. If you want to see separate CALLCHAIN_DEFERRED
+ records for some reason, use --no-merge-callchains explicitly.
+
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index ef0c7565bd5c..ef2281c56743 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -94,6 +94,9 @@ RECORD OPTIONS
-g::
--callchain::
Do call-graph (stack chain/backtrace) recording
+-o::
+--output=::
+ Select the output file (default: perf.data)
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 973fede403a0..892c82a9bf40 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -249,6 +249,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
works well with -s/--summary option where no argument information is
required.
+--max-summary=N::
+ Maximum number of lines in the summary mode. Note that this applies to
+ each entry (thread or cgroup).
+
PAGEFAULTS
----------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index cd95ba09f727..c9d4dec65344 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -348,6 +348,16 @@ to special needs.
struct perf_bpil, which contains detailed information about
a BPF program, including type, id, tag, jited/xlated instructions, etc.
+The format of data in HEADER_BPF_PROG_INFO is as follows:
+ u32 count
+
+ struct perf_bpil {
+ u32 info_len; /* size of struct bpf_prog_info, when the tool is compiled */
+ u32 data_len; /* total bytes allocated for data, round up to 8 bytes */
+ u64 arrays; /* which arrays are included in data */
+ struct bpf_prog_info info;
+ u8 data[];
+ }[count];
HEADER_BPF_BTF = 26,
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 5a5832ee7b53..bd9f4804d56b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -23,8 +23,39 @@ HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
# borrowed from kernel headers depends on it, e.g. put_unaligned_*().
CFLAGS += -fno-strict-aliasing
-# Enabled Wthread-safety analysis for clang builds.
+# Set target flag and options when using clang as compiler.
ifeq ($(CC_NO_CLANG), 0)
+ CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+ CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+ CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+ CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+ CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+ CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+ CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+ CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+ CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu
+
+ # Default to host architecture if ARCH is not explicitly given.
+ ifeq ($(ARCH), $(HOSTARCH))
+ CLANG_TARGET_FLAGS := $(shell $(CLANG) -print-target-triple)
+ else
+ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+ endif
+
+ ifeq ($(CROSS_COMPILE),)
+ ifeq ($(CLANG_TARGET_FLAGS),)
+ $(error Specify CROSS_COMPILE or add CLANG_TARGET_FLAGS for $(ARCH))
+ else
+ CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+ endif # CLANG_TARGET_FLAGS
+ else
+ CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+ endif # CROSS_COMPILE
+
+ CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as
+ CXX := $(CXX) $(CLANG_FLAGS) -fintegrated-as
+
+ # Enabled Wthread-safety analysis for clang builds.
CFLAGS += -Wthread-safety
endif
@@ -323,9 +354,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
-FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
-
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -Wall
CORE_CFLAGS += -Wextra
@@ -417,10 +445,6 @@ ifeq ($(feature-eventfd), 1)
CFLAGS += -DHAVE_EVENTFD_SUPPORT
endif
-ifeq ($(feature-get_current_dir_name), 1)
- CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
-endif
-
ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
@@ -600,13 +624,6 @@ ifndef NO_LIBELF
LIBBPF_INCLUDE = $(LIBBPF_DIR)/..
endif
endif
-
- FEATURE_CHECK_CFLAGS-libbpf-strings="-I$(LIBBPF_INCLUDE)"
- $(call feature_check,libbpf-strings)
- ifeq ($(feature-libbpf-strings), 1)
- $(call detected,CONFIG_LIBBPF_STRINGS)
- CFLAGS += -DHAVE_LIBBPF_STRINGS_SUPPORT
- endif
endif
endif # NO_LIBBPF
endif # NO_LIBELF
@@ -784,15 +801,10 @@ endif
ifndef NO_SLANG
ifneq ($(feature-libslang), 1)
- ifneq ($(feature-libslang-include-subdir), 1)
- $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev)
- NO_SLANG := 1
- else
- CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
- endif
+ $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev)
+ NO_SLANG := 1
endif
ifndef NO_SLANG
- # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
CFLAGS += -DHAVE_SLANG_SUPPORT
EXTLIBS += -lslang
$(call detected,CONFIG_SLANG)
@@ -817,9 +829,7 @@ ifdef GTK2
endif
endif
-ifdef NO_LIBPERL
- CFLAGS += -DNO_LIBPERL
-else
+ifdef LIBPERL
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
@@ -829,17 +839,13 @@ else
PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+ $(call feature_check,libperl)
ifneq ($(feature-libperl), 1)
- CFLAGS += -DNO_LIBPERL
- NO_LIBPERL := 1
- $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev)
+ $(error Missing perl devel files. Please install perl-ExtUtils-Embed/libperl-dev)
else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
CFLAGS += -DHAVE_LIBPERL_SUPPORT
- ifeq ($(CC_NO_CLANG), 0)
- CFLAGS += -Wno-compound-token-split-by-macro
- endif
$(call detected,CONFIG_LIBPERL)
endif
endif
@@ -921,6 +927,8 @@ ifdef BUILD_NONDISTRO
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd -lopcodes
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
+ FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
else
# we are on a system that requires -liberty and (maybe) -lz
# to link against -lbfd; test each case individually here
@@ -947,6 +955,7 @@ ifdef BUILD_NONDISTRO
CFLAGS += -DHAVE_LIBBFD_SUPPORT
CXXFLAGS += -DHAVE_LIBBFD_SUPPORT
+ $(call detected,CONFIG_LIBBFD)
$(call feature_check,libbfd-buildid)
@@ -955,6 +964,14 @@ ifdef BUILD_NONDISTRO
else
$(warning Old version of libbfd/binutils things like PE executable profiling will not be available)
endif
+
+ ifeq ($(feature-disassembler-four-args), 1)
+ CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+ endif
+
+ ifeq ($(feature-disassembler-init-styled), 1)
+ CFLAGS += -DDISASM_INIT_STYLED
+ endif
endif
ifndef NO_LIBLLVM
@@ -1046,14 +1063,6 @@ ifdef HAVE_KVM_STAT_SUPPORT
CFLAGS += -DHAVE_KVM_STAT_SUPPORT
endif
-ifeq ($(feature-disassembler-four-args), 1)
- CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
-endif
-
-ifeq ($(feature-disassembler-init-styled), 1)
- CFLAGS += -DDISASM_INIT_STYLED
-endif
-
ifeq (${IS_64_BIT}, 1)
ifndef NO_PERF_READ_VDSO32
$(call feature_check,compile-32)
@@ -1103,19 +1112,6 @@ ifndef NO_CAPSTONE
endif
endif
-ifndef NO_AUXTRACE
- ifeq ($(SRCARCH),x86)
- ifeq ($(feature-get_cpuid), 0)
- $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc)
- NO_AUXTRACE := 1
- endif
- endif
- ifndef NO_AUXTRACE
- $(call detected,CONFIG_AUXTRACE)
- CFLAGS += -DHAVE_AUXTRACE_SUPPORT
- endif
-endif
-
ifdef EXTRA_TESTS
$(call detected,CONFIG_EXTRA_TESTS)
CFLAGS += -DHAVE_EXTRA_TESTS
@@ -1181,20 +1177,6 @@ ifneq ($(NO_LIBTRACEEVENT),1)
else
$(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel and/or set LIBTRACEEVENT_DIR or build with NO_LIBTRACEEVENT=1)
endif
-
- ifeq ($(feature-libtracefs), 1)
- CFLAGS += $(shell $(PKG_CONFIG) --cflags libtracefs)
- LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtracefs)
- EXTLIBS += $(shell $(PKG_CONFIG) --libs-only-l libtracefs)
- LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs).0.0
- LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3))
- CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP)
- else
- $(warning libtracefs is missing. Please install libtracefs-dev/libtracefs-devel)
- endif
endif
# Among the variables below, these:
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e2150acc2c13..b3f481a626af 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -17,7 +17,7 @@ include ../scripts/utilities.mak
#
# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
#
-# Define NO_LIBPERL to disable perl script extension.
+# Define LIBPERL to enable perl script extension.
#
# Define NO_LIBPYTHON to disable python script extension.
#
@@ -84,8 +84,6 @@ include ../scripts/utilities.mak
#
# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
#
-# Define NO_AUXTRACE if you do not want AUX area tracing support
-#
# Define NO_LIBBPF if you do not want BPF support
#
# Define NO_LIBCAP if you do not want process capabilities considered by perf
@@ -194,7 +192,7 @@ else
# paths are used instead.
ifdef CROSS_COMPILE
ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
- CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ CROSS_ARCH = $(notdir $(CROSS_COMPILE:%-=%))
PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
@@ -234,12 +232,12 @@ endif
# The fixdep build - we force fixdep tool to be built as
# the first target in the separate make session not to be
# disturbed by any parallel make jobs. Once fixdep is done
-# we issue the requested build with FIXDEP=1 variable.
+# we issue the requested build with FIXDEP_BUILT=1 variable.
#
# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
# targets, because it's not necessary.
-ifdef FIXDEP
+ifdef FIXDEP_BUILT
force_fixdep := 0
else
force_fixdep := $(config)
@@ -286,7 +284,7 @@ $(goals) all: sub-make
sub-make: fixdep
@./check-headers.sh
- $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+ $(Q)$(MAKE) FIXDEP_BUILT=1 -f Makefile.perf $(goals)
else # force_fixdep
@@ -941,7 +939,7 @@ $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o
ifndef NO_JVMTI
LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
-$(LIBJVMTI_IN): FORCE
+$(LIBJVMTI_IN): prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
@@ -1103,7 +1101,7 @@ endif
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-iostat) \
$(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifndef NO_LIBPERL
+ifdef LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
@@ -1272,9 +1270,24 @@ endif # CONFIG_PERF_BPF_SKEL
bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) $(SKEL_OUT)/vmlinux.h
+pmu-events-clean:
+ifeq ($(OUTPUT),)
+ $(call QUIET_CLEAN, pmu-events) $(RM) \
+ pmu-events/pmu-events.c \
+ pmu-events/metric_test.log \
+ pmu-events/test-empty-pmu-events.c \
+ pmu-events/empty-pmu-events.log
+else # When an OUTPUT directory is present, clean up the copied pmu-events/arch directory.
+ $(call QUIET_CLEAN, pmu-events) $(RM) -r $(OUTPUT)pmu-events/arch \
+ $(OUTPUT)pmu-events/pmu-events.c \
+ $(OUTPUT)pmu-events/metric_test.log \
+ $(OUTPUT)pmu-events/test-empty-pmu-events.c \
+ $(OUTPUT)pmu-events/empty-pmu-events.log
+endif
+
clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean \
arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean \
- tests-coresight-targets-clean
+ tests-coresight-targets-clean pmu-events-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive \
$(OUTPUT)perf-iostat $(LANG_BINDINGS)
$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '*.a' -delete -o \
@@ -1287,10 +1300,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
$(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
- $(OUTPUT)pmu-events/pmu-events.c \
- $(OUTPUT)pmu-events/test-empty-pmu-events.c \
- $(OUTPUT)pmu-events/empty-pmu-events.log \
- $(OUTPUT)pmu-events/metric_test.log \
$(OUTPUT)$(fadvise_advice_array) \
$(OUTPUT)$(fsconfig_arrays) \
$(OUTPUT)$(fsmount_arrays) \
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
index cf91a43362b0..5e667b0f5512 100644
--- a/tools/perf/arch/arm/annotate/instructions.c
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
index 27c1d5ebcd91..b07e699aaa3c 100644
--- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -482,3 +482,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f7a8b37d1c68..fd695e1fdaee 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -3,4 +3,4 @@ perf-util-y += perf_regs.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-util-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
+perf-util-y += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 3b8eca0ffb17..eb6404267f17 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -5,6 +5,7 @@
*/
#include <dirent.h>
+#include <errno.h>
#include <stdbool.h>
#include <linux/coresight-pmu.h>
#include <linux/zalloc.h>
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index f70075c89aa0..9be8da5207f5 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -20,7 +20,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
{
struct perf_cpu_map *intersect, *online = cpu_map__online();
-#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->auxtrace = true;
@@ -39,7 +38,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->selectable = true;
#endif
}
-#endif
/* Workaround some ARM PMU's failing to correctly set CPU maps for online processors. */
intersect = perf_cpu_map__intersect(online, pmu->cpus);
perf_cpu_map__put(online);
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index d465d093e7eb..16cb62d40bd9 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
+#include <errno.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index a74521b79eaa..d63881081d2e 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,13 +1,14 @@
+perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
+perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+perf-util-y += ../../arm/util/auxtrace.o
+perf-util-y += ../../arm/util/cs-etm.o
+perf-util-y += ../../arm/util/pmu.o
+perf-util-y += arm-spe.o
perf-util-y += header.o
+perf-util-y += hisi-ptt.o
perf-util-y += machine.o
+perf-util-y += mem-events.o
perf-util-y += perf_regs.o
-perf-util-y += tsc.o
perf-util-y += pmu.o
-perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
-perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
-perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-
-perf-util-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
- ../../arm/util/auxtrace.o \
- ../../arm/util/cs-etm.o \
- arm-spe.o mem-events.o hisi-ptt.o
+perf-util-y += tsc.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 4f2833b62ff5..d5ec1408d0ae 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -10,6 +10,7 @@
#include <linux/log2.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <time.h>
#include "../../../util/cpumap.h"
@@ -121,12 +122,17 @@ static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
/* No Arm SPE PMU is found */
data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
data[ARM_SPE_CAP_MIN_IVAL] = 0;
+ data[ARM_SPE_CAP_EVENT_FILTER] = 0;
} else {
data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
val = 0;
data[ARM_SPE_CAP_MIN_IVAL] = val;
+
+ if (perf_pmu__scan_file(pmu, "caps/event_filter", "%lx", &val) != 1)
+ val = 0;
+ data[ARM_SPE_CAP_EVENT_FILTER] = val;
}
free(cpuid);
diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h
index 27c981ebe401..bf827f19ace0 100644
--- a/tools/perf/arch/arm64/util/arm64_exception_types.h
+++ b/tools/perf/arch/arm64/util/arm64_exception_types.h
@@ -31,9 +31,10 @@
#define ESR_ELx_EC_FP_ASIMD (0x07)
#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_OTHER (0x0A)
+/* Unallocated EC: 0x0B */
#define ESR_ELx_EC_CP14_64 (0x0C)
-/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_BTI (0x0D)
#define ESR_ELx_EC_ILL (0x0E)
/* Unallocated EC: 0x0F - 0x10 */
#define ESR_ELx_EC_SVC32 (0x11)
@@ -46,7 +47,10 @@
#define ESR_ELx_EC_SYS64 (0x18)
#define ESR_ELx_EC_SVE (0x19)
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
-/* Unallocated EC: 0x1b - 0x1E */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME (0x1D)
+/* Unallocated EC: 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
@@ -55,11 +59,12 @@
#define ESR_ELx_EC_DABT_LOW (0x24)
#define ESR_ELx_EC_DABT_CUR (0x25)
#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_MOPS (0x27)
#define ESR_ELx_EC_FP_EXC32 (0x28)
/* Unallocated EC: 0x29 - 0x2B */
#define ESR_ELx_EC_FP_EXC64 (0x2C)
-/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_GCS (0x2D)
+/* Unallocated EC: 0x2E */
#define ESR_ELx_EC_SERROR (0x2F)
#define ESR_ELx_EC_BREAKPT_LOW (0x30)
#define ESR_ELx_EC_BREAKPT_CUR (0x31)
diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c
index eac9739c87e6..fe457fd58c9e 100644
--- a/tools/perf/arch/arm64/util/hisi-ptt.c
+++ b/tools/perf/arch/arm64/util/hisi-ptt.c
@@ -9,6 +9,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <time.h>
#include <internal/lib.h> // page_size
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 1e8c44c7b614..7a7049c2c307 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -382,3 +382,5 @@
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
467 n64 open_tree_attr sys_open_tree_attr
+468 n64 file_getattr sys_file_getattr
+469 n64 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 9a084bdb8926..b453e80dfc00 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -558,3 +558,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index fdd6a77a3432..3d0d5427aef7 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -10,3 +10,4 @@ perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o
perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-util-y += auxtrace.o
diff --git a/tools/perf/arch/powerpc/util/auxtrace.c b/tools/perf/arch/powerpc/util/auxtrace.c
new file mode 100644
index 000000000000..292ea335e4ff
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/auxtrace.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VPA support
+ */
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/evlist.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/powerpc-vpadtl.h"
+#include "../../util/record.h"
+#include <internal/lib.h> // page_size
+
+#define KiB(x) ((x) * 1024)
+
+static int
+powerpc_vpadtl_recording_options(struct auxtrace_record *ar __maybe_unused,
+ struct evlist *evlist __maybe_unused,
+ struct record_opts *opts)
+{
+ opts->full_auxtrace = true;
+
+ /*
+ * Set auxtrace_mmap_pages to minimum
+ * two pages
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ return 0;
+}
+
+static size_t powerpc_vpadtl_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ return VPADTL_AUXTRACE_PRIV_SIZE;
+}
+
+static int
+powerpc_vpadtl_info_fill(struct auxtrace_record *itr __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size __maybe_unused)
+{
+ auxtrace_info->type = PERF_AUXTRACE_VPA_DTL;
+
+ return 0;
+}
+
+static void powerpc_vpadtl_free(struct auxtrace_record *itr)
+{
+ free(itr);
+}
+
+static u64 powerpc_vpadtl_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return 0;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
+ int *err)
+{
+ struct auxtrace_record *aux;
+ struct evsel *pos;
+ int found = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (strstarts(pos->name, "vpa_dtl")) {
+ found = 1;
+ pos->needs_auxtrace_mmap = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return NULL;
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ evlist__to_front(pos->evlist, pos);
+
+ aux = zalloc(sizeof(*aux));
+ if (aux == NULL) {
+ pr_debug("aux record is NULL\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ aux->recording_options = powerpc_vpadtl_recording_options;
+ aux->info_priv_size = powerpc_vpadtl_info_priv_size;
+ aux->info_fill = powerpc_vpadtl_info_fill;
+ aux->free = powerpc_vpadtl_free;
+ aux->reference = powerpc_vpadtl_reference;
+ return aux;
+}
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index a4569b96ef06..8a6744d658db 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -470,3 +470,5 @@
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr sys_file_setattr
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index 736c0ad09194..c64eb18dbdae 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -7,4 +7,4 @@ perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-util-y += machine.o
perf-util-y += pmu.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += auxtrace.o
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
index 5068baa3e092..1a3676145066 100644
--- a/tools/perf/arch/s390/util/auxtrace.c
+++ b/tools/perf/arch/s390/util/auxtrace.c
@@ -1,3 +1,4 @@
+#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <linux/kernel.h>
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
index 52a7652fcff6..5e9c9eff5539 100644
--- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -471,3 +471,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
index 83e45eb6c095..ebb7d06d1044 100644
--- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -513,3 +513,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index c6d403eae744..803f9351a3fb 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -248,6 +248,7 @@ static void update_insn_state_x86(struct type_state *state,
tsr = &state->regs[state->ret_reg];
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("call [%x] return -> reg%d",
@@ -284,6 +285,7 @@ static void update_insn_state_x86(struct type_state *state,
!strcmp(var_name, "this_cpu_off") &&
tsr->kind == TSR_KIND_CONST) {
tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->offset = 0;
tsr->ok = true;
imm_value = tsr->imm_value;
}
@@ -291,6 +293,19 @@ static void update_insn_state_x86(struct type_state *state,
else
return;
+ /* Ignore add to non-pointer or non-const types */
+ if (tsr->kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&tsr->type) == DW_TAG_pointer_type &&
+ src->reg1 != DWARF_REG_PC && tsr->kind == TSR_KIND_TYPE && !dst->mem_ref)) {
+ tsr->offset += imm_value;
+ pr_debug_dtp("add [%x] offset %#"PRIx64" to reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+
+ if (tsr->kind == TSR_KIND_CONST)
+ tsr->imm_value += imm_value;
+
if (tsr->kind != TSR_KIND_PERCPU_BASE)
return;
@@ -301,7 +316,8 @@ static void update_insn_state_x86(struct type_state *state,
* as a pointer.
*/
tsr->type = type_die;
- tsr->kind = TSR_KIND_POINTER;
+ tsr->kind = TSR_KIND_PERCPU_POINTER;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
@@ -311,6 +327,135 @@ static void update_insn_state_x86(struct type_state *state,
return;
}
+ if (!strncmp(dl->ins.name, "sub", 3)) {
+ u64 imm_value = -1ULL;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ tsr->copied_from = -1;
+
+ if (src->imm)
+ imm_value = src->offset;
+ else if (has_reg_type(state, src->reg1) &&
+ state->regs[src->reg1].kind == TSR_KIND_CONST)
+ imm_value = state->regs[src->reg1].imm_value;
+
+ if (tsr->kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&tsr->type) == DW_TAG_pointer_type &&
+ src->reg1 != DWARF_REG_PC && tsr->kind == TSR_KIND_TYPE && !dst->mem_ref)) {
+ tsr->offset -= imm_value;
+ pr_debug_dtp("sub [%x] offset %#"PRIx64" to reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+
+ if (tsr->kind == TSR_KIND_CONST)
+ tsr->imm_value -= imm_value;
+
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "lea", 3)) {
+ int sreg = src->reg1;
+ struct type_state_reg src_tsr;
+
+ if (!has_reg_type(state, sreg) ||
+ !has_reg_type(state, dst->reg1) ||
+ !src->mem_ref)
+ return;
+
+ src_tsr = state->regs[sreg];
+ tsr = &state->regs[dst->reg1];
+
+ tsr->copied_from = -1;
+ tsr->ok = false;
+
+ /* Case 1: Based on stack pointer or frame pointer */
+ if (sreg == fbreg || sreg == state->stack_reg) {
+ struct type_state_stack *stack;
+ int offset = src->offset - fboff;
+
+ stack = find_stack_state(state, offset);
+ if (!stack)
+ return;
+
+ tsr->type = stack->type;
+ tsr->kind = TSR_KIND_POINTER;
+ tsr->offset = offset - stack->offset;
+ tsr->ok = true;
+
+ if (sreg == fbreg) {
+ pr_debug_dtp("lea [%x] address of -%#x(stack) -> reg%d",
+ insn_offset, -src->offset, dst->reg1);
+ } else {
+ pr_debug_dtp("lea [%x] address of %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ }
+
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Case 2: Based on a register holding a typed pointer */
+ else if (src_tsr.ok && (src_tsr.kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&src_tsr.type) == DW_TAG_pointer_type &&
+ src_tsr.kind == TSR_KIND_TYPE))) {
+
+ if (src_tsr.kind == TSR_KIND_TYPE &&
+ __die_get_real_type(&state->regs[sreg].type, &type_die) == NULL)
+ return;
+
+ if (src_tsr.kind == TSR_KIND_POINTER)
+ type_die = state->regs[sreg].type;
+
+ /* Check if the target type has a member at the new offset */
+ if (die_get_member_type(&type_die,
+ src->offset + src_tsr.offset, &type_die) == NULL)
+ return;
+
+ tsr->type = src_tsr.type;
+ tsr->kind = src_tsr.kind;
+ tsr->offset = src->offset + src_tsr.offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("lea [%x] address of %s%#x(reg%d) -> reg%d",
+ insn_offset, src->offset < 0 ? "-" : "",
+ abs(src->offset), sreg, dst->reg1);
+
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ return;
+ }
+
+ /* Invalidate register states for other ops which may change pointers */
+ if (has_reg_type(state, dst->reg1) && !dst->mem_ref &&
+ dwarf_tag(&state->regs[dst->reg1].type) == DW_TAG_pointer_type) {
+ if (!strncmp(dl->ins.name, "imul", 4) || !strncmp(dl->ins.name, "mul", 3) ||
+ !strncmp(dl->ins.name, "idiv", 4) || !strncmp(dl->ins.name, "div", 3) ||
+ !strncmp(dl->ins.name, "shl", 3) || !strncmp(dl->ins.name, "shr", 3) ||
+ !strncmp(dl->ins.name, "sar", 3) || !strncmp(dl->ins.name, "and", 3) ||
+ !strncmp(dl->ins.name, "or", 2) || !strncmp(dl->ins.name, "neg", 3) ||
+ !strncmp(dl->ins.name, "inc", 3) || !strncmp(dl->ins.name, "dec", 3)) {
+ pr_debug_dtp("%s [%x] invalidate reg%d\n",
+ dl->ins.name, insn_offset, dst->reg1);
+ state->regs[dst->reg1].ok = false;
+ state->regs[dst->reg1].copied_from = -1;
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "xor", 3) && dst->reg1 == src->reg1) {
+ /* xor reg, reg clears the register */
+ pr_debug_dtp("xor [%x] clear reg%d\n",
+ insn_offset, dst->reg1);
+
+ state->regs[dst->reg1].kind = TSR_KIND_CONST;
+ state->regs[dst->reg1].imm_value = 0;
+ state->regs[dst->reg1].ok = true;
+ state->regs[dst->reg1].copied_from = -1;
+ return;
+ }
+ }
+
if (strncmp(dl->ins.name, "mov", 3))
return;
@@ -345,6 +490,7 @@ static void update_insn_state_x86(struct type_state *state,
if (var_addr == 40) {
tsr->kind = TSR_KIND_CANARY;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
@@ -361,6 +507,7 @@ static void update_insn_state_x86(struct type_state *state,
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
@@ -372,6 +519,7 @@ static void update_insn_state_x86(struct type_state *state,
if (src->imm) {
tsr->kind = TSR_KIND_CONST;
tsr->imm_value = src->offset;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
@@ -388,10 +536,11 @@ static void update_insn_state_x86(struct type_state *state,
tsr->type = state->regs[src->reg1].type;
tsr->kind = state->regs[src->reg1].kind;
tsr->imm_value = state->regs[src->reg1].imm_value;
+ tsr->offset = state->regs[src->reg1].offset;
tsr->ok = true;
/* To copy back the variable type later (hopefully) */
- if (tsr->kind == TSR_KIND_TYPE)
+ if (tsr->kind == TSR_KIND_TYPE || tsr->kind == TSR_KIND_POINTER)
tsr->copied_from = src->reg1;
pr_debug_dtp("mov [%x] reg%d -> reg%d",
@@ -421,12 +570,14 @@ retry:
} else if (!stack->compound) {
tsr->type = stack->type;
tsr->kind = stack->kind;
+ tsr->offset = stack->ptr_offset;
tsr->ok = true;
} else if (die_get_member_type(&stack->type,
offset - stack->offset,
&type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
} else {
tsr->ok = false;
@@ -446,15 +597,30 @@ retry:
else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
state->regs[sreg].kind == TSR_KIND_TYPE &&
die_deref_ptr_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
+ src->offset + state->regs[sreg].offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
insn_offset, src->offset, sreg, dst->reg1);
pr_debug_type_name(&tsr->type, tsr->kind);
}
+ /* Handle dereference of TSR_KIND_POINTER registers */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_POINTER &&
+ die_get_member_type(&state->regs[sreg].type,
+ src->offset + state->regs[sreg].offset, &type_die)) {
+ tsr->type = state->regs[sreg].type;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = src->offset + state->regs[sreg].offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] addr %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
/* Or check if it's a global variable */
else if (sreg == DWARF_REG_PC) {
struct map_symbol *ms = dloc->ms;
@@ -473,6 +639,7 @@ retry:
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
@@ -504,6 +671,7 @@ retry:
die_get_member_type(&type_die, offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
if (src->multi_regs) {
@@ -521,11 +689,12 @@ retry:
}
/* And then dereference the calculated pointer if it has one */
else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_POINTER &&
+ state->regs[sreg].kind == TSR_KIND_PERCPU_POINTER &&
die_get_member_type(&state->regs[sreg].type,
src->offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
@@ -548,6 +717,7 @@ retry:
&var_name, &offset) &&
!strcmp(var_name, "__per_cpu_offset")) {
tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] percpu base reg%d\n",
@@ -583,10 +753,10 @@ retry:
*/
if (!stack->compound)
set_stack_state(stack, offset, tsr->kind,
- &tsr->type);
+ &tsr->type, tsr->offset);
} else {
findnew_stack_state(state, offset, tsr->kind,
- &tsr->type);
+ &tsr->type, tsr->offset);
}
if (dst->reg1 == fbreg) {
@@ -596,6 +766,11 @@ retry:
pr_debug_dtp("mov [%x] reg%d -> %#x(reg%d)",
insn_offset, src->reg1, offset, dst->reg1);
}
+ if (tsr->offset != 0) {
+ pr_debug_dtp(" reg%d offset %#x ->",
+ src->reg1, tsr->offset);
+ }
+
pr_debug_type_name(&tsr->type, tsr->kind);
}
/*
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index ac007ea00979..4877e16da69a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -473,3 +473,5 @@
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
467 i386 open_tree_attr sys_open_tree_attr
+468 i386 file_getattr sys_file_getattr
+469 i386 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..ced2a1deecd7 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
333 common io_pgetevents sys_io_pgetevents
334 common rseq sys_rseq
335 common uretprobe sys_uretprobe
+336 common uprobe sys_uprobe
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal sys_pidfd_send_signal
@@ -391,6 +392,8 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 7790b3e20f4e..b017d1ca6e3c 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,9 +3,9 @@ perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-test-y += arch-tests.o
perf-test-y += hybrid.o
-perf-test-$(CONFIG_AUXTRACE) += intel-pt-test.o
+perf-test-y += intel-pt-test.o
ifeq ($(CONFIG_EXTRA_TESTS),y)
-perf-test-$(CONFIG_AUXTRACE) += insn-x86.o
+perf-test-y += insn-x86.o
endif
perf-test-$(CONFIG_X86_64) += bp-modify.o
perf-test-y += amd-ibs-via-core-pmu.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 8f9cfeaa170f..c3e1619c5e79 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -3,7 +3,6 @@
#include "tests/tests.h"
#include "arch-tests.h"
-#ifdef HAVE_AUXTRACE_SUPPORT
#ifdef HAVE_EXTRA_TESTS
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
#endif
@@ -19,7 +18,6 @@ struct test_suite suite__intel_pt = {
.test_cases = intel_pt_tests,
};
-#endif
#if defined(__x86_64__)
DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
@@ -39,12 +37,10 @@ struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
-#ifdef HAVE_AUXTRACE_SUPPORT
#ifdef HAVE_EXTRA_TESTS
&suite__insn_x86,
#endif
&suite__intel_pt,
-#endif
#if defined(__x86_64__)
&suite__bp_modify,
#endif
diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index b217ed67cd4e..970997759ec2 100644
--- a/tools/perf/arch/x86/tests/intel-pt-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/bits.h>
#include <string.h>
-#include <cpuid.h>
#include <sched.h>
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
@@ -11,6 +10,7 @@
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "../util/cpuid.h"
#include "cpumap.h"
/**
@@ -363,7 +363,7 @@ static int get_pt_caps(int cpu, struct pt_caps *caps)
memset(caps, 0, sizeof(*caps));
for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
- __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
+ cpuid(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i);
pr_debug("eax = 0x%08x\n", r.eax);
pr_debug("ebx = 0x%08x\n", r.ebx);
@@ -380,7 +380,7 @@ static bool is_hybrid(void)
unsigned int eax, ebx, ecx, edx = 0;
bool result;
- __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+ cpuid(7, 0, &eax, &ebx, &ecx, &edx);
result = edx & BIT(15);
pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n",
result ? "" : "not ", edx);
diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c
index 8d0ea7a4bbc1..3ee4e5e71be3 100644
--- a/tools/perf/arch/x86/tests/topdown.c
+++ b/tools/perf/arch/x86/tests/topdown.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include "arch-tests.h"
#include "../util/topdown.h"
+#include "debug.h"
#include "evlist.h"
#include "parse-events.h"
#include "pmu.h"
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 06d7c0205b3d..c0dc5965f362 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -14,7 +14,7 @@ perf-util-y += iostat.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += auxtrace.o
perf-util-y += archinsn.o
-perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
-perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-util-y += intel-pt.o
+perf-util-y += intel-bts.o
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 9bc80fff3aa0..23a8e662a912 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -1,10 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include "util/evlist.h"
#include "util/evsel.h"
+#include "util/evsel_config.h"
#include "util/env.h"
#include "util/pmu.h"
#include "util/pmus.h"
+#include "util/stat.h"
+#include "util/strbuf.h"
#include "linux/string.h"
#include "topdown.h"
#include "evsel.h"
@@ -67,6 +72,57 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
event_name);
}
+void arch_evsel__apply_ratio_to_prev(struct evsel *evsel,
+ struct perf_event_attr *attr)
+{
+ struct perf_event_attr *prev_attr = NULL;
+ struct evsel *evsel_prev = NULL;
+ const char *name = "acr_mask";
+ int evsel_idx = 0;
+ __u64 ev_mask, pr_ev_mask;
+
+ if (!perf_pmu__has_format(evsel->pmu, name)) {
+ pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name);
+ return;
+ }
+ if (perf_pmu__format_type(evsel->pmu, name) !=
+ PERF_PMU_FORMAT_VALUE_CONFIG2) {
+ pr_err("'%s' does not have config2 format support\n", evsel->pmu->name);
+ return;
+ }
+
+ evsel_prev = evsel__prev(evsel);
+ if (!evsel_prev) {
+ pr_err("Previous event does not exist.\n");
+ return;
+ }
+
+ prev_attr = &evsel_prev->core.attr;
+
+ if (prev_attr->config2) {
+ pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name);
+ return;
+ }
+
+ /*
+ * acr_mask (config2) is calculated using the event's index in
+ * the event group. The first event will use the index of the
+ * second event as its mask (e.g., 0x2), indicating that the
+ * second event counter will be reset and a sample taken for
+ * the first event if its counter overflows. The second event
+ * will use the mask consisting of the first and second bits
+ * (e.g., 0x3), meaning both counters will be reset if the
+ * second event counter overflows.
+ */
+
+ evsel_idx = evsel__group_idx(evsel);
+ ev_mask = 1ull << evsel_idx;
+ pr_ev_mask = 1ull << (evsel_idx - 1);
+
+ prev_attr->config2 = ev_mask;
+ attr->config2 = ev_mask | pr_ev_mask;
+}
+
static void ibs_l3miss_warn(void)
{
pr_warning(
@@ -102,13 +158,15 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
}
}
-int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
+static int amd_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
{
- if (!x86__is_amd_cpu())
+ struct perf_pmu *pmu;
+
+ if (evsel->core.attr.precise_ip == 0)
return 0;
- if (!evsel->core.attr.precise_ip &&
- !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3)))
+ pmu = evsel__find_pmu(evsel);
+ if (!pmu || strncmp(pmu->name, "ibs", 3))
return 0;
/* More verbose IBS errors. */
@@ -118,6 +176,54 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try "
"again without the privilege modifiers (like 'k') at the end.");
}
+ return 0;
+}
+
+static int intel_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret;
+
+ if (err != EINVAL)
+ return 0;
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ if (arch_is_topdown_slots(evsel)) {
+ if (!evsel__is_group_leader(evsel)) {
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Topdown slots event can only be group leader "
+ "in '%s'.", sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ } else if (arch_is_topdown_metrics(evsel)) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos == evsel || !arch_is_topdown_metrics(pos))
+ continue;
+
+ if (pos->core.attr.config != evsel->core.attr.config)
+ continue;
+
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Perf metric event '%s' is duplicated "
+ "in the same group (only one event is allowed) in '%s'.",
+ evsel__name(evsel), sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ }
return 0;
}
+
+int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ return x86__is_amd_cpu()
+ ? amd_evsel__open_strerror(evsel, msg, size)
+ : intel_evsel__open_strerror(evsel, err, msg, size);
+}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index add33cb5d1da..b394ad9cc635 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -12,7 +12,6 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
#include <linux/err.h>
-#include <cpuid.h>
#include "../../../util/session.h"
#include "../../../util/event.h"
@@ -34,6 +33,7 @@
#include <internal/lib.h> // page_size
#include "../../../util/intel-pt.h"
#include <api/fs/fs.h>
+#include "cpuid.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -72,7 +72,7 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
int err;
parse_events_terms__init(&terms);
- err = parse_events_terms(&terms, str, /*input=*/ NULL);
+ err = parse_events_terms(&terms, str);
if (err)
goto out_free;
@@ -311,7 +311,7 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ cpuid(0x15, 0, &eax, &ebx, &ecx, &edx);
*n = ebx;
*d = eax;
}
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 424716518b75..bff36f9345ea 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -3,9 +3,11 @@
#include <string.h>
#include "../../../util/kvm-stat.h"
#include "../../../util/evsel.h"
+#include "../../../util/env.h"
#include <asm/svm.h>
#include <asm/vmx.h>
#include <asm/kvm.h>
+#include <subcmd/parse-options.h>
define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -211,3 +213,52 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
return 0;
}
+
+/*
+ * After KVM supports PEBS for guest on Intel platforms
+ * (https://lore.kernel.org/all/20220411101946.20262-1-likexu@tencent.com/),
+ * host loses the capability to sample guest with PEBS since all PEBS related
+ * MSRs are switched to guest value after vm-entry, like IA32_DS_AREA MSR is
+ * switched to guest GVA at vm-entry. This would lead to "perf kvm record"
+ * fails to sample guest on Intel platforms since "cycles:P" event is used to
+ * sample guest by default.
+ *
+ * So, to avoid this issue explicitly use "cycles" instead of "cycles:P" event
+ * by default to sample guest on Intel platforms.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+ const char **tmp;
+ bool event = false;
+ int ret = 0, i, j = *argc;
+
+ const struct option event_options[] = {
+ OPT_BOOLEAN('e', "event", &event, NULL),
+ OPT_BOOLEAN(0, "pfm-events", &event, NULL),
+ OPT_END()
+ };
+
+ if (!x86__is_intel_cpu())
+ return 0;
+
+ tmp = calloc(j + 1, sizeof(char *));
+ if (!tmp)
+ return -ENOMEM;
+
+ for (i = 0; i < j; i++)
+ tmp[i] = argv[i];
+
+ parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+ if (!event) {
+ argv[j++] = STRDUP_FAIL_EXIT("-e");
+ argv[j++] = STRDUP_FAIL_EXIT("cycles");
+ *argc += 2;
+ }
+
+ free(tmp);
+ return 0;
+
+EXIT:
+ free(tmp);
+ return ret;
+}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 58113482654b..a3f96221758d 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -273,7 +273,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
{
struct perf_pmu_caps *ldlat_cap;
-#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
pmu->selectable = true;
@@ -283,7 +282,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->auxtrace = true;
pmu->selectable = true;
}
-#endif
if (x86__is_amd_cpu()) {
if (strcmp(pmu->name, "ibs_op"))
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 0d01b662627a..bafd285119d7 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include "util/evlist.h"
#include "util/pmu.h"
#include "util/pmus.h"
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
index f657a77314f8..374e4cb788d8 100644
--- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9f736423af53..8519eb5a42fa 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -28,6 +28,7 @@ int bench_syscall_fork(int argc, const char **argv);
int bench_syscall_execve(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_mmap(int argc, const char **argv);
int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index bfaf50e4e519..faf9c34b4a5d 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index 7e25b0e413f6..e697c20951bc 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -37,7 +37,7 @@ static noinline void workload(int val)
accumulator++;
}
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+#if defined(__i386__) || defined(__x86_64__)
static bool asm_test_bit(long nr, const unsigned long *addr)
{
bool oldbit;
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
index 1481196a22f0..1968c9d00b5b 100644
--- a/tools/perf/bench/futex.c
+++ b/tools/perf/bench/futex.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <err.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/prctl.h>
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index dd295d27044a..fcb72d682cf8 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -8,6 +8,7 @@
#ifndef _FUTEX_H
#define _FUTEX_H
+#include <stdbool.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 19d45c377ac1..2908a3a796c9 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -22,27 +22,39 @@
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
+#include <sys/mman.h>
#include <errno.h>
#include <linux/time64.h>
-#include <linux/zalloc.h>
+#include <linux/log2.h>
#define K 1024
+#define PAGE_SHIFT_4KB 12
+#define PAGE_SHIFT_2MB 21
+#define PAGE_SHIFT_1GB 30
+
static const char *size_str = "1MB";
static const char *function_str = "all";
-static int nr_loops = 1;
+static const char *page_size_str = "4KB";
+static const char *chunk_size_str = "0";
+static unsigned int nr_loops = 1;
static bool use_cycles;
static int cycles_fd;
+static unsigned int seed;
-static const struct option options[] = {
+static const struct option bench_common_options[] = {
OPT_STRING('s', "size", &size_str, "1MB",
"Specify the size of the memory buffers. "
"Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_STRING('p', "page", &page_size_str, "4KB",
+ "Specify page-size for mapping memory buffers. "
+ "Available sizes: 4KB, 2MB, 1GB (case insensitive)"),
+
OPT_STRING('f', "function", &function_str, "all",
"Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
- OPT_INTEGER('l', "nr_loops", &nr_loops,
+ OPT_UINTEGER('l', "nr_loops", &nr_loops,
"Specify the number of loops to run. (default: 1)"),
OPT_BOOLEAN('c', "cycles", &use_cycles,
@@ -51,15 +63,56 @@ static const struct option options[] = {
OPT_END()
};
+static const struct option bench_mem_options[] = {
+ OPT_STRING('k', "chunk", &chunk_size_str, "0",
+ "Specify the chunk-size for each invocation. "
+ "Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+};
+
+union bench_clock {
+ u64 cycles;
+ struct timeval tv;
+};
+
+struct bench_params {
+ size_t size;
+ size_t size_total;
+ size_t chunk_size;
+ unsigned int nr_loops;
+ unsigned int page_shift;
+ unsigned int seed;
+};
+
+struct bench_mem_info {
+ const struct function *functions;
+ int (*do_op)(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt);
+ const char *const *usage;
+ const struct option *options;
+ bool alloc_src;
+};
+
+typedef bool (*mem_init_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
+typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
typedef void *(*memcpy_t)(void *, const void *, size_t);
typedef void *(*memset_t)(void *, int, size_t);
+typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool);
struct function {
const char *name;
const char *desc;
- union {
- memcpy_t memcpy;
- memset_t memset;
+ struct {
+ mem_init_t init;
+ mem_fini_t fini;
+ union {
+ memcpy_t memcpy;
+ memset_t memset;
+ mmap_op_t mmap_op;
+ };
} fn;
};
@@ -91,6 +144,34 @@ static u64 get_cycles(void)
return clk;
}
+static void clock_get(union bench_clock *t)
+{
+ if (use_cycles)
+ t->cycles = get_cycles();
+ else
+ BUG_ON(gettimeofday(&t->tv, NULL));
+}
+
+static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e)
+{
+ union bench_clock t;
+
+ if (use_cycles)
+ t.cycles = e->cycles - s->cycles;
+ else
+ timersub(&e->tv, &s->tv, &t.tv);
+
+ return t;
+}
+
+static void clock_accum(union bench_clock *a, union bench_clock *b)
+{
+ if (use_cycles)
+ a->cycles += b->cycles;
+ else
+ timeradd(&a->tv, &b->tv, &a->tv);
+}
+
static double timeval2double(struct timeval *ts)
{
return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
@@ -107,54 +188,40 @@ static double timeval2double(struct timeval *ts)
printf(" %14lf GB/sec\n", x / K / K / K); \
} while (0)
-struct bench_mem_info {
- const struct function *functions;
- u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
- double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
- const char *const *usage;
- bool alloc_src;
-};
-
-static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+static void __bench_mem_function(struct bench_mem_info *info, struct bench_params *p,
+ int r_idx)
{
const struct function *r = &info->functions[r_idx];
double result_bps = 0.0;
- u64 result_cycles = 0;
- void *src = NULL, *dst = zalloc(size);
+ union bench_clock rt = { 0 };
+ void *src = NULL, *dst = NULL;
printf("# function '%s' (%s)\n", r->name, r->desc);
- if (dst == NULL)
- goto out_alloc_failed;
-
- if (info->alloc_src) {
- src = zalloc(size);
- if (src == NULL)
- goto out_alloc_failed;
- }
+ if (r->fn.init && r->fn.init(info, p, &src, &dst))
+ goto out_init_failed;
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s bytes ...\n\n", size_str);
- if (use_cycles) {
- result_cycles = info->do_cycles(r, size, src, dst);
- } else {
- result_bps = info->do_gettimeofday(r, size, src, dst);
- }
+ if (info->do_op(r, p, src, dst, &rt))
+ goto out_test_failed;
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (use_cycles) {
- printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+ printf(" %14lf cycles/byte\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
print_bps(result_bps);
}
break;
case BENCH_FORMAT_SIMPLE:
if (use_cycles) {
- printf("%lf\n", (double)result_cycles/size_total);
+ printf("%lf\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
printf("%lf\n", result_bps);
}
break;
@@ -164,22 +231,23 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
break;
}
+out_test_failed:
out_free:
- free(src);
- free(dst);
+ if (r->fn.fini) r->fn.fini(info, p, &src, &dst);
return;
-out_alloc_failed:
- printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+out_init_failed:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
goto out_free;
}
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
{
int i;
- size_t size;
- double size_total;
+ struct bench_params p = { 0 };
+ unsigned int page_size;
- argc = parse_options(argc, argv, options, info->usage, 0);
+ argc = parse_options(argc, argv, info->options, info->usage, 0);
if (use_cycles) {
i = init_cycles();
@@ -189,17 +257,37 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
}
}
- size = (size_t)perf_atoll((char *)size_str);
- size_total = (double)size * nr_loops;
+ p.nr_loops = nr_loops;
+ p.size = (size_t)perf_atoll((char *)size_str);
- if ((s64)size <= 0) {
+ if ((s64)p.size <= 0) {
fprintf(stderr, "Invalid size:%s\n", size_str);
return 1;
}
+ p.size_total = p.size * p.nr_loops;
+
+ p.chunk_size = (size_t)perf_atoll((char *)chunk_size_str);
+ if ((s64)p.chunk_size < 0 || (s64)p.chunk_size > (s64)p.size) {
+ fprintf(stderr, "Invalid chunk_size:%s\n", chunk_size_str);
+ return 1;
+ }
+ if (!p.chunk_size)
+ p.chunk_size = p.size;
+
+ page_size = (unsigned int)perf_atoll((char *)page_size_str);
+ if (page_size != (1 << PAGE_SHIFT_4KB) &&
+ page_size != (1 << PAGE_SHIFT_2MB) &&
+ page_size != (1 << PAGE_SHIFT_1GB)) {
+ fprintf(stderr, "Invalid page-size:%s\n", page_size_str);
+ return 1;
+ }
+ p.page_shift = ilog2(page_size);
+
+ p.seed = seed;
if (!strncmp(function_str, "all", 3)) {
for (i = 0; info->functions[i].name; i++)
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -218,7 +306,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 1;
}
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -235,47 +323,81 @@ static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
fn(dst, src, size);
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static int do_memcpy(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ union bench_clock start, end;
memcpy_t fn = r->fn.memcpy;
- int i;
- memcpy_prefault(fn, size, src, dst);
+ memcpy_prefault(fn, p->size, src, dst);
+
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, src + off, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- cycle_end = get_cycles();
+ *rt = clock_diff(&start, &end);
- return cycle_end - cycle_start;
+ return 0;
}
-static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
+static void *bench_mmap(size_t size, bool populate, unsigned int page_shift)
{
- struct timeval tv_start, tv_end, tv_diff;
- memcpy_t fn = r->fn.memcpy;
- int i;
+ void *p;
+ int extra = populate ? MAP_POPULATE : 0;
+
+ if (page_shift != PAGE_SHIFT_4KB)
+ extra |= MAP_HUGETLB | (page_shift << MAP_HUGE_SHIFT);
+
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ extra | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+
+ return p == MAP_FAILED ? NULL : p;
+}
+
+static void bench_munmap(void *p, size_t size)
+{
+ if (p)
+ munmap(p, size);
+}
+
+static bool mem_alloc(struct bench_mem_info *info, struct bench_params *p,
+ void **src, void **dst)
+{
+ bool failed;
- memcpy_prefault(fn, size, src, dst);
+ *dst = bench_mmap(p->size, true, p->page_shift);
+ failed = *dst == NULL;
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ if (info->alloc_src) {
+ *src = bench_mmap(p->size, true, p->page_shift);
+ failed = failed || *src == NULL;
+ }
+
+ return failed;
+}
- timersub(&tv_end, &tv_start, &tv_diff);
+static void mem_free(struct bench_mem_info *info __maybe_unused,
+ struct bench_params *p __maybe_unused,
+ void **src, void **dst)
+{
+ bench_munmap(*dst, p->size);
+ bench_munmap(*src, p->size);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ *dst = *src = NULL;
}
struct function memcpy_functions[] = {
{ .name = "default",
.desc = "Default memcpy() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memcpy = memcpy },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# define MEMCPY_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memcpy = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memcpy-x86-64-asm-def.h"
# undef MEMCPY_FN
#endif
@@ -292,55 +414,36 @@ int bench_mem_memcpy(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memcpy_functions,
- .do_cycles = do_memcpy_cycles,
- .do_gettimeofday = do_memcpy_gettimeofday,
+ .do_op = do_memcpy,
.usage = bench_mem_memcpy_usage,
+ .options = bench_mem_options,
.alloc_src = true,
};
return bench_mem_common(argc, argv, &info);
}
-static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
-{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memset_t fn = r->fn.memset;
- int i;
-
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, -1, size);
-
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- cycle_end = get_cycles();
-
- return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+static int do_memset(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst, union bench_clock *rt)
{
- struct timeval tv_start, tv_end, tv_diff;
+ union bench_clock start, end;
memset_t fn = r->fn.memset;
- int i;
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
- fn(dst, -1, size);
+ fn(dst, -1, p->size);
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, i, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- timersub(&tv_end, &tv_start, &tv_diff);
+ *rt = clock_diff(&start, &end);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ return 0;
}
static const char * const bench_mem_memset_usage[] = {
@@ -351,10 +454,13 @@ static const char * const bench_mem_memset_usage[] = {
static const struct function memset_functions[] = {
{ .name = "default",
.desc = "Default memset() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memset = memset },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# define MEMSET_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memset = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memset-x86-64-asm-def.h"
# undef MEMSET_FN
#endif
@@ -366,9 +472,91 @@ int bench_mem_memset(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memset_functions,
- .do_cycles = do_memset_cycles,
- .do_gettimeofday = do_memset_gettimeofday,
+ .do_op = do_memset,
.usage = bench_mem_memset_usage,
+ .options = bench_mem_options,
+ };
+
+ return bench_mem_common(argc, argv, &info);
+}
+
+static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random)
+{
+ unsigned long npages = size / (1 << page_shift);
+ unsigned long offset = 0, r = 0;
+
+ for (unsigned long i = 0; i < npages; i++) {
+ if (random)
+ r = rand() % (1 << page_shift);
+
+ *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i;
+ offset += 1 << page_shift;
+ }
+}
+
+static int do_mmap(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst __maybe_unused,
+ union bench_clock *accum)
+{
+ union bench_clock start, end, diff;
+ mmap_op_t fn = r->fn.mmap_op;
+ bool populate = strcmp(r->name, "populate") == 0;
+
+ if (p->seed)
+ srand(p->seed);
+
+ for (unsigned int i = 0; i < p->nr_loops; i++) {
+ clock_get(&start);
+ dst = bench_mmap(p->size, populate, p->page_shift);
+ if (!dst)
+ goto out;
+
+ fn(dst, p->size, p->page_shift, p->seed);
+ clock_get(&end);
+ diff = clock_diff(&start, &end);
+ clock_accum(accum, &diff);
+
+ bench_munmap(dst, p->size);
+ }
+
+ return 0;
+out:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
+ return -1;
+}
+
+static const char * const bench_mem_mmap_usage[] = {
+ "perf bench mem mmap <options>",
+ NULL
+};
+
+static const struct function mmap_functions[] = {
+ { .name = "demand",
+ .desc = "Demand loaded mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = "populate",
+ .desc = "Eagerly populated mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = NULL, }
+};
+
+int bench_mem_mmap(int argc, const char **argv)
+{
+ static const struct option bench_mmap_options[] = {
+ OPT_UINTEGER('r', "randomize", &seed,
+ "Seed to randomize page access offset."),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+ };
+
+ struct bench_mem_info info = {
+ .functions = mmap_functions,
+ .do_op = do_mmap,
+ .usage = bench_mem_mmap_usage,
+ .options = bench_mmap_options,
};
return bench_mem_common(argc, argv, &info);
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 5bcaec5601a8..852e48cfd8fe 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMCPY_FN(fn, name, desc) \
+#define MEMCPY_FN(fn, init, fini, name, desc) \
void *fn(void *, const void *, size_t);
#include "mem-memcpy-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index 6188e19d3129..f43038f4448b 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMCPY_FN(memcpy_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
MEMCPY_FN(__memcpy,
+ mem_alloc,
+ mem_free,
"x86-64-movsq",
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 53f45482663f..278c5da12d63 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMSET_FN(fn, name, desc) \
+#define MEMSET_FN(fn, init, fini, name, desc) \
void *fn(void *, int, size_t);
#include "mem-memset-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index 247c72fdfb9d..80ad1b7ea770 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMSET_FN(memset_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memset() in arch/x86/lib/memset_64.S")
MEMSET_FN(__memset,
+ mem_alloc,
+ mem_free,
"x86-64-stosq",
"movsq-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c
index 9e4d36486f62..14a464ad8cea 100644
--- a/tools/perf/bench/pmu-scan.c
+++ b/tools/perf/bench/pmu-scan.c
@@ -4,6 +4,7 @@
*
* Copyright 2023 Google LLC.
*/
+#include <errno.h>
#include <stdio.h>
#include "bench.h"
#include "util/debug.h"
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index b3d493697675..265d49a913d9 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -6,6 +6,7 @@
*
* Copyright 2019 Google LLC.
*/
+#include <errno.h>
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5d57d2913f3d..9c27bb30b708 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -313,7 +313,8 @@ out_put:
return ret;
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (event->feat.feat_id < HEADER_LAST_FEATURE)
@@ -519,7 +520,7 @@ find_next:
/* skip missing symbols */
nd = rb_next(nd);
} else if (use_browser == 1) {
- key = hist_entry__tui_annotate(he, evsel, NULL);
+ key = hist_entry__tui_annotate(he, evsel, NULL, NO_ADDR);
switch (key) {
case -1:
@@ -917,11 +918,6 @@ int cmd_annotate(int argc, const char **argv)
symbol_conf.annotate_data_sample = true;
} else if (annotate_opts.code_with_type) {
symbol_conf.annotate_data_member = true;
-
- if (!annotate.use_stdio) {
- pr_err("--code-with-type only works with --stdio.\n");
- goto out_delete;
- }
}
setup_browser(true);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 2c1a9f3d847a..02dea1b88228 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -65,6 +65,7 @@ static struct bench mem_benchmarks[] = {
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
{ "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
+ { "mmap", "Benchmark for mmap() mappings", bench_mem_mmap },
{ "all", "Run all memory access benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 9e9ff471ddd1..d390ae4e3ec8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -45,6 +45,8 @@
#include "pmus.h"
#include "string2.h"
#include "util/util.h"
+#include "util/symbol.h"
+#include "util/annotate.h"
struct c2c_hists {
struct hists hists;
@@ -62,6 +64,7 @@ struct compute_stats {
struct c2c_hist_entry {
struct c2c_hists *hists;
+ struct evsel *evsel;
struct c2c_stats stats;
unsigned long *cpuset;
unsigned long *nodeset;
@@ -225,6 +228,12 @@ he__get_c2c_hists(struct hist_entry *he,
return hists;
}
+static void c2c_he__set_evsel(struct c2c_hist_entry *c2c_he,
+ struct evsel *evsel)
+{
+ c2c_he->evsel = evsel;
+}
+
static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
struct perf_sample *sample)
{
@@ -275,6 +284,33 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->load, weight);
}
+/*
+ * Return true if annotation is possible. When list is NULL,
+ * it means that we are called at the c2c_browser level,
+ * in that case we allow annotation to be initialized. When list
+ * is non-NULL, it means that we are called at the cacheline_browser
+ * level, in that case we allow annotation only if use_browser
+ * is set and symbol information is available.
+ */
+static bool perf_c2c__has_annotation(struct perf_hpp_list *list)
+{
+ if (use_browser != 1)
+ return false;
+ return !list || list->sym;
+}
+
+static void perf_c2c__evsel_hists_inc_stats(struct evsel *evsel,
+ struct hist_entry *he,
+ struct perf_sample *sample)
+{
+ struct hists *evsel_hists = evsel__hists(evsel);
+
+ hists__inc_nr_samples(evsel_hists, he->filtered);
+ evsel_hists->stats.total_period += sample->period;
+ if (!he->filtered)
+ evsel_hists->stats.total_non_filtered_period += sample->period;
+}
+
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -286,7 +322,7 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
struct c2c_stats stats = { .nr_entries = 0, };
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi, *mi_dup;
+ struct mem_info *mi = NULL;
struct callchain_cursor *cursor;
int ret;
@@ -313,20 +349,15 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
goto out;
}
- /*
- * The mi object is released in hists__add_entry_ops,
- * if it gets sorted out into existing data, so we need
- * to take the copy now.
- */
- mi_dup = mem_info__get(mi);
-
c2c_decode_stats(&stats, mi);
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
&al, NULL, NULL, mi, NULL,
sample, true);
- if (he == NULL)
- goto free_mi;
+ if (he == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -334,8 +365,15 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
c2c_he__set_cpu(c2c_he, sample);
c2c_he__set_node(c2c_he, sample);
+ c2c_he__set_evsel(c2c_he, evsel);
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+
+ if (perf_c2c__has_annotation(NULL)) {
+ perf_c2c__evsel_hists_inc_stats(evsel, he, sample);
+ addr_map_symbol__inc_samples(mem_info__iaddr(mi), sample, evsel);
+ }
+
ret = hist_entry__append_callchain(he, sample);
if (!ret) {
@@ -350,17 +388,19 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
int node = c2c.cpu2node[cpu];
- mi = mi_dup;
-
c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2, machine->env);
- if (!c2c_hists)
- goto free_mi;
+ if (!c2c_hists) {
+ ret = -ENOMEM;
+ goto out;
+ }
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
&al, NULL, NULL, mi, NULL,
sample, true);
- if (he == NULL)
- goto free_mi;
+ if (he == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -371,20 +411,16 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
c2c_he__set_cpu(c2c_he, sample);
c2c_he__set_node(c2c_he, sample);
+ c2c_he__set_evsel(c2c_he, evsel);
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);
}
out:
+ mem_info__put(mi);
addr_location__exit(&al);
return ret;
-
-free_mi:
- mem_info__put(mi_dup);
- mem_info__put(mi);
- ret = -ENOMEM;
- goto out;
}
static const char * const c2c_usage[] = {
@@ -1997,6 +2033,9 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name, stru
if (dim == &dim_dso)
hpp_list->dso = 1;
+ if (dim == &dim_symbol || dim == &dim_iaddr)
+ hpp_list->sym = 1;
+
perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
return 0;
}
@@ -2550,6 +2589,44 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
}
#ifdef HAVE_SLANG_SUPPORT
+
+static int perf_c2c__toggle_annotation(struct hist_browser *browser)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct symbol *sym = NULL;
+ struct annotated_source *src = NULL;
+ struct c2c_hist_entry *c2c_he = NULL;
+ u64 al_addr = NO_ADDR;
+
+ if (!perf_c2c__has_annotation(he->hists->hpp_list)) {
+ ui_browser__help_window(&browser->b, "No annotation support");
+ return 0;
+ }
+
+ if (he == NULL) {
+ ui_browser__help_window(&browser->b, "No entry selected for annotation");
+ return 0;
+ }
+
+ sym = he->ms.sym;
+ if (sym == NULL) {
+ ui_browser__help_window(&browser->b, "Can not annotate, no symbol found");
+ return 0;
+ }
+
+ src = symbol__hists(sym, 0);
+ if (src == NULL) {
+ ui_browser__help_window(&browser->b, "Failed to initialize annotation source");
+ return 0;
+ }
+
+ if (he->mem_info)
+ al_addr = mem_info__iaddr(he->mem_info)->al_addr;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return hist_entry__tui_annotate(he, c2c_he->evsel, NULL, al_addr);
+}
+
static void c2c_browser__update_nr_entries(struct hist_browser *hb)
{
u64 nr_entries = 0;
@@ -2617,6 +2694,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
" ENTER Toggle callchains (if present) \n"
" n Toggle Node details info \n"
" s Toggle full length of symbol and source line columns \n"
+ " a Toggle annotation view \n"
" q Return back to cacheline list \n";
if (!he)
@@ -2651,6 +2729,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
c2c.node_info = (c2c.node_info + 1) % 3;
setup_nodes_header();
break;
+ case 'a':
+ perf_c2c__toggle_annotation(browser);
+ break;
case 'q':
goto out;
case '?':
@@ -3006,6 +3087,7 @@ static int perf_c2c__report(int argc, const char **argv)
const char *display = NULL;
const char *coalesce = NULL;
bool no_source = false;
+ const char *disassembler_style = NULL, *objdump_path = NULL;
const struct option options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -3033,6 +3115,10 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
"Enable LBR callgraph stitching approach"),
OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"),
+ OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+ "Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "objdump", &objdump_path, "path",
+ "objdump binary to use for disassembly and annotations"),
OPT_PARENT(c2c_options),
OPT_END()
};
@@ -3040,6 +3126,12 @@ static int perf_c2c__report(int argc, const char **argv)
const char *output_str, *sort_str = NULL;
struct perf_env *env;
+ annotation_options__init();
+
+ err = hists__init();
+ if (err < 0)
+ goto out;
+
argc = parse_options(argc, argv, options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argc)
@@ -3052,6 +3144,27 @@ static int perf_c2c__report(int argc, const char **argv)
if (c2c.stats_only)
c2c.use_stdio = true;
+ /**
+ * Annotation related options disassembler_style, objdump_path are set
+ * in the c2c_options, so we can use them here.
+ */
+ if (disassembler_style) {
+ annotate_opts.disassembler_style = strdup(disassembler_style);
+ if (!annotate_opts.disassembler_style) {
+ err = -ENOMEM;
+ pr_err("Failed to allocate memory for annotation options\n");
+ goto out;
+ }
+ }
+ if (objdump_path) {
+ annotate_opts.objdump_path = strdup(objdump_path);
+ if (!annotate_opts.objdump_path) {
+ err = -ENOMEM;
+ pr_err("Failed to allocate memory for annotation options\n");
+ goto out;
+ }
+ }
+
err = symbol__validate_sym_arguments();
if (err)
goto out;
@@ -3126,6 +3239,38 @@ static int perf_c2c__report(int argc, const char **argv)
if (err)
goto out_mem2node;
+ if (c2c.use_stdio)
+ use_browser = 0;
+ else
+ use_browser = 1;
+
+ /*
+ * Only in the TUI browser we are doing integrated annotation,
+ * so don't allocate extra space that won't be used in the stdio
+ * implementation.
+ */
+ if (perf_c2c__has_annotation(NULL)) {
+ int ret = symbol__annotation_init();
+
+ if (ret < 0)
+ goto out_mem2node;
+ /*
+ * For searching by name on the "Browse map details".
+ * providing it only in verbose mode not to bloat too
+ * much struct symbol.
+ */
+ if (verbose > 0) {
+ /*
+ * XXX: Need to provide a less kludgy way to ask for
+ * more space per symbol, the u32 is for the index on
+ * the ui browser.
+ * See symbol__browser_index.
+ */
+ symbol_conf.priv_size += sizeof(u32);
+ }
+ annotation_config__init();
+ }
+
if (symbol__init(env) < 0)
goto out_mem2node;
@@ -3135,11 +3280,6 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
- if (c2c.use_stdio)
- use_browser = 0;
- else
- use_browser = 1;
-
setup_browser(false);
err = perf_session__process_events(session);
@@ -3210,6 +3350,7 @@ out_mem2node:
out_session:
perf_session__delete(session);
out:
+ annotation_options__exit();
return err;
}
diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c
index b1e205871ab1..d19769a8f689 100644
--- a/tools/perf/builtin-check.c
+++ b/tools/perf/builtin-check.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "builtin.h"
#include "color.h"
+#include "util/bpf-utils.h"
#include "util/debug.h"
#include "util/header.h"
#include <tools/config.h>
@@ -41,15 +42,15 @@ struct feature_status supported_features[] = {
FEATURE_STATUS("dwarf", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT),
- FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT),
FEATURE_STATUS_TIP("libbfd", HAVE_LIBBFD_SUPPORT, "Deprecated, license incompatibility, use BUILD_NONDISTRO=1 and install binutils-dev[el]"),
FEATURE_STATUS("libbpf-strings", HAVE_LIBBPF_STRINGS_SUPPORT),
FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT),
FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT),
+ FEATURE_STATUS("libLLVM", HAVE_LIBLLVM_SUPPORT),
FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT),
FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT),
- FEATURE_STATUS("libperl", HAVE_LIBPERL_SUPPORT),
+ FEATURE_STATUS_TIP("libperl", HAVE_LIBPERL_SUPPORT, "Deprecated, use LIBPERL=1 and install perl-ExtUtils-Embed/libperl-dev to build with it"),
FEATURE_STATUS("libpfm4", HAVE_LIBPFM),
FEATURE_STATUS("libpython", HAVE_LIBPYTHON_SUPPORT),
FEATURE_STATUS("libslang", HAVE_SLANG_SUPPORT),
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index a9bd7bbef5a9..fb6e2c3c24c8 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -19,7 +19,8 @@
#include "util/tool.h"
#include "util/util.h"
-static int process_header_feature(struct perf_session *session __maybe_unused,
+static int process_header_feature(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
session_done = 1;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40ba6a94f719..aa7be4fb5838 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -197,18 +197,20 @@ static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
}
#endif
-static int perf_event__repipe_op2_synth(struct perf_session *session,
+static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- return perf_event__repipe_synth(session->tool, event);
+ return perf_event__repipe_synth(tool, event);
}
-static int perf_event__repipe_op4_synth(struct perf_session *session,
+static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event,
u64 data __maybe_unused,
const char *str __maybe_unused)
{
- return perf_event__repipe_synth(session->tool, event);
+ return perf_event__repipe_synth(tool, event);
}
static int perf_event__repipe_attr(const struct perf_tool *tool,
@@ -237,8 +239,6 @@ static int perf_event__repipe_event_update(const struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-
static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
{
char buf[4096];
@@ -258,12 +258,11 @@ static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t
return 0;
}
-static s64 perf_event__repipe_auxtrace(struct perf_session *session,
+static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
- struct perf_inject *inject = container_of(tool, struct perf_inject,
- tool);
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
int ret;
inject->have_auxtrace = true;
@@ -296,18 +295,6 @@ static s64 perf_event__repipe_auxtrace(struct perf_session *session,
return event->auxtrace.size;
}
-#else
-
-static s64
-perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-#endif
-
static int perf_event__repipe(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -661,12 +648,13 @@ static int perf_event__repipe_exit(const struct perf_tool *tool,
}
#ifdef HAVE_LIBTRACEEVENT
-static int perf_event__repipe_tracing_data(struct perf_session *session,
+static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- perf_event__repipe_synth(session->tool, event);
+ perf_event__repipe_synth(tool, event);
- return perf_event__process_tracing_data(session, event);
+ return perf_event__process_tracing_data(tool, session, event);
}
#endif
@@ -1348,7 +1336,7 @@ static int process_build_id(const struct perf_tool *tool,
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- return perf_event__process_build_id(inject->session, event);
+ return perf_event__process_build_id(tool, inject->session, event);
}
static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
@@ -1780,9 +1768,10 @@ static int host__repipe(const struct perf_tool *tool,
return perf_event__repipe(tool, event, sample, machine);
}
-static int host__finished_init(struct perf_session *session, union perf_event *event)
+static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
+ union perf_event *event)
{
- struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct guest_session *gs = &inject->guest_session;
int ret;
@@ -1829,7 +1818,7 @@ static int host__finished_init(struct perf_session *session, union perf_event *e
if (ret)
return ret;
- return perf_event__repipe_op2_synth(session, event);
+ return perf_event__repipe_op2_synth(tool, session, event);
}
/*
@@ -2538,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
inject.tool.auxtrace = perf_event__repipe_auxtrace;
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
inject.tool.dont_split_sample_group = true;
+ inject.tool.merge_deferred_callchains = false;
inject.session = __perf_session__new(&data, &inject.tool,
/*trace_event_repipe=*/inject.output.is_pipe,
/*host_env=*/NULL);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7b15b4a705e4..c61369d54dd9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1636,14 +1636,6 @@ exit:
return ret;
}
-#define STRDUP_FAIL_EXIT(s) \
- ({ char *_p; \
- _p = strdup(s); \
- if (!_p) \
- return -ENOMEM; \
- _p; \
- })
-
int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
{
return 0;
@@ -1688,7 +1680,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
for (j = 0; j < events_tp_size; j++) {
- rec_argv[i++] = "-e";
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-e");
rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
}
@@ -1696,7 +1688,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
for (j = 1; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
@@ -1719,7 +1711,13 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
record_usage = kvm_stat_record_usage;
- return cmd_record(i, rec_argv);
+ ret = cmd_record(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int
@@ -2000,58 +1998,122 @@ static int __cmd_record(const char *file_name, int argc, const char **argv)
int rec_argc, i = 0, j, ret;
const char **rec_argv;
- ret = kvm_add_default_arch_event(&argc, argv);
- if (ret)
- return -EINVAL;
-
- rec_argc = argc + 2;
+ /*
+ * Besides the 2 more options "-o" and "filename",
+ * kvm_add_default_arch_event() may add 2 extra options,
+ * so allocate 4 more items.
+ */
+ rec_argc = argc + 2 + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("record");
- rec_argv[i++] = strdup("-o");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("record");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
- BUG_ON(i != rec_argc);
+ BUG_ON(i + 2 != rec_argc);
+
+ ret = kvm_add_default_arch_event(&i, rec_argv);
+ if (ret)
+ goto EXIT;
+
+ ret = cmd_record(i, rec_argv);
- return cmd_record(i, rec_argv);
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int __cmd_report(const char *file_name, int argc, const char **argv)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, ret;
const char **rec_argv;
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("report");
- rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("report");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-i");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
- return cmd_report(i, rec_argv);
+ ret = cmd_report(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int
__cmd_buildid_list(const char *file_name, int argc, const char **argv)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, ret;
const char **rec_argv;
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("buildid-list");
- rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("buildid-list");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-i");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
- return cmd_buildid_list(i, rec_argv);
+ ret = cmd_buildid_list(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
+}
+
+static int __cmd_top(int argc, const char **argv)
+{
+ int rec_argc, i = 0, ret;
+ const char **rec_argv;
+
+ /*
+ * kvm_add_default_arch_event() may add 2 extra options, so
+ * allocate 2 more pointers in adavance.
+ */
+ rec_argc = argc + 2;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -ENOMEM;
+
+ for (i = 0; i < argc; i++)
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[i]);
+
+ BUG_ON(i != argc);
+
+ ret = kvm_add_default_arch_event(&i, rec_argv);
+ if (ret)
+ goto EXIT;
+
+ ret = cmd_top(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
int cmd_kvm(int argc, const char **argv)
@@ -2114,7 +2176,7 @@ int cmd_kvm(int argc, const char **argv)
else if (strlen(argv[0]) > 2 && strstarts("diff", argv[0]))
return cmd_diff(argc, argv);
else if (!strcmp(argv[0], "top"))
- return cmd_top(argc, argv);
+ return __cmd_top(argc, argv);
else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0]))
return __cmd_buildid_list(file_name, argc, argv);
#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index d2e08de5976d..7f3068264568 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -2273,12 +2273,23 @@ static void setup_event_list(struct perf_kwork *kwork,
pr_debug("\n");
}
+#define STRDUP_FAIL_EXIT(s) \
+ ({ char *_p; \
+ _p = strdup(s); \
+ if (!_p) { \
+ ret = -ENOMEM; \
+ goto EXIT; \
+ } \
+ _p; \
+ })
+
static int perf_kwork__record(struct perf_kwork *kwork,
int argc, const char **argv)
{
const char **rec_argv;
unsigned int rec_argc, i, j;
struct kwork_class *class;
+ int ret;
const char *const record_args[] = {
"record",
@@ -2298,17 +2309,17 @@ static int perf_kwork__record(struct perf_kwork *kwork,
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
- rec_argv[i] = strdup(record_args[i]);
+ rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
list_for_each_entry(class, &kwork->class_list, list) {
for (j = 0; j < class->nr_tracepoints; j++) {
- rec_argv[i++] = strdup("-e");
- rec_argv[i++] = strdup(class->tp_handlers[j].name);
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-e");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(class->tp_handlers[j].name);
}
}
for (j = 1; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
@@ -2317,7 +2328,13 @@ static int perf_kwork__record(struct perf_kwork *kwork,
pr_debug("%s ", rec_argv[j]);
pr_debug("\n");
- return cmd_record(i, rec_argv);
+ ret = cmd_record(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
int cmd_kwork(int argc, const char **argv)
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index caf42276bd0f..5cbca0bacd35 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -130,7 +130,7 @@ static void default_print_event(void *ps, const char *topic,
if (deprecated && !print_state->deprecated)
return;
- if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob))
+ if (print_state->pmu_glob && (!pmu_name || !strglobmatch(pmu_name, print_state->pmu_glob)))
return;
if (print_state->exclude_abi && pmu_type < PERF_TYPE_MAX && pmu_type != PERF_TYPE_RAW)
@@ -283,8 +283,8 @@ static void default_print_metric(void *ps,
}
struct json_print_state {
- /** @fp: File to write output to. */
- FILE *fp;
+ /** The shared print_state */
+ struct print_state common;
/** Should a separator be printed prior to the next item? */
bool need_sep;
};
@@ -292,7 +292,7 @@ struct json_print_state {
static void json_print_start(void *ps)
{
struct json_print_state *print_state = ps;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
fprintf(fp, "[\n");
}
@@ -300,7 +300,7 @@ static void json_print_start(void *ps)
static void json_print_end(void *ps)
{
struct json_print_state *print_state = ps;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : "");
}
@@ -370,9 +370,26 @@ static void json_print_event(void *ps, const char *topic,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
struct strbuf buf;
+ if (deprecated && !print_state->common.deprecated)
+ return;
+
+ if (print_state->common.pmu_glob &&
+ (!pmu_name || !strglobmatch(pmu_name, print_state->common.pmu_glob)))
+ return;
+
+ if (print_state->common.exclude_abi && pmu_type < PERF_TYPE_MAX &&
+ pmu_type != PERF_TYPE_RAW)
+ return;
+
+ if (print_state->common.event_glob &&
+ (!event_name || !strglobmatch(event_name, print_state->common.event_glob)) &&
+ (!event_alias || !strglobmatch(event_alias, print_state->common.event_glob)) &&
+ (!topic || !strglobmatch_nocase(topic, print_state->common.event_glob)))
+ return;
+
strbuf_init(&buf, 0);
fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
@@ -446,9 +463,16 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
struct strbuf buf;
+ if (print_state->common.event_glob &&
+ (!print_state->common.metrics || !name ||
+ !strglobmatch(name, print_state->common.event_glob)) &&
+ (!print_state->common.metricgroups || !group ||
+ !strglobmatch(group, print_state->common.event_glob)))
+ return;
+
strbuf_init(&buf, 0);
fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
@@ -521,10 +545,12 @@ int cmd_list(int argc, const char **argv)
.fp = stdout,
.desc = true,
};
- struct print_state json_ps = {
- .fp = stdout,
+ struct json_print_state json_ps = {
+ .common = {
+ .fp = stdout,
+ },
};
- void *ps = &default_ps;
+ struct print_state *ps = &default_ps;
struct print_callbacks print_cb = {
.print_start = default_print_start,
.print_end = default_print_end,
@@ -572,9 +598,11 @@ int cmd_list(int argc, const char **argv)
argc = parse_options(argc, argv, list_options, list_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+ if (json)
+ ps = &json_ps.common;
+
if (output_path) {
- default_ps.fp = fopen(output_path, "w");
- json_ps.fp = default_ps.fp;
+ ps->fp = fopen(output_path, "w");
}
setup_pager();
@@ -590,14 +618,13 @@ int cmd_list(int argc, const char **argv)
.print_metric = json_print_metric,
.skip_duplicate_pmus = json_skip_duplicate_pmus,
};
- ps = &json_ps;
} else {
- default_ps.last_topic = strdup("");
- assert(default_ps.last_topic);
- default_ps.visited_metrics = strlist__new(NULL, NULL);
- assert(default_ps.visited_metrics);
+ ps->last_topic = strdup("");
+ assert(ps->last_topic);
+ ps->visited_metrics = strlist__new(NULL, NULL);
+ assert(ps->visited_metrics);
if (unit_name)
- default_ps.pmu_glob = strdup(unit_name);
+ ps->pmu_glob = strdup(unit_name);
else if (cputype) {
const struct perf_pmu *pmu = perf_pmus__pmu_for_pmu_filter(cputype);
@@ -606,14 +633,16 @@ int cmd_list(int argc, const char **argv)
ret = -1;
goto out;
}
- default_ps.pmu_glob = strdup(pmu->name);
+ ps->pmu_glob = strdup(pmu->name);
}
}
print_cb.print_start(ps);
if (argc == 0) {
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ if (!unit_name) {
+ ps->metrics = true;
+ ps->metricgroups = true;
+ }
print_events(&print_cb, ps);
goto out;
}
@@ -633,41 +662,58 @@ int cmd_list(int argc, const char **argv)
zfree(&default_ps.pmu_glob);
default_ps.pmu_glob = old_pmu_glob;
} else if (strcmp(argv[i], "hw") == 0 ||
- strcmp(argv[i], "hardware") == 0)
- print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX);
- else if (strcmp(argv[i], "sw") == 0 ||
+ strcmp(argv[i], "hardware") == 0) {
+ char *old_event_glob = ps->event_glob;
+
+ ps->event_glob = strdup("legacy hardware");
+ if (!ps->event_glob) {
+ ret = -1;
+ goto out;
+ }
+ perf_pmus__print_pmu_events(&print_cb, ps);
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
+ } else if (strcmp(argv[i], "sw") == 0 ||
strcmp(argv[i], "software") == 0) {
- char *old_pmu_glob = default_ps.pmu_glob;
+ char *old_pmu_glob = ps->pmu_glob;
static const char * const sw_globs[] = { "software", "tool" };
for (size_t j = 0; j < ARRAY_SIZE(sw_globs); j++) {
- default_ps.pmu_glob = strdup(sw_globs[j]);
- if (!default_ps.pmu_glob) {
+ ps->pmu_glob = strdup(sw_globs[j]);
+ if (!ps->pmu_glob) {
ret = -1;
goto out;
}
perf_pmus__print_pmu_events(&print_cb, ps);
- zfree(&default_ps.pmu_glob);
+ zfree(&ps->pmu_glob);
}
- default_ps.pmu_glob = old_pmu_glob;
+ ps->pmu_glob = old_pmu_glob;
} else if (strcmp(argv[i], "cache") == 0 ||
- strcmp(argv[i], "hwcache") == 0)
- print_hwcache_events(&print_cb, ps);
- else if (strcmp(argv[i], "pmu") == 0) {
- default_ps.exclude_abi = true;
+ strcmp(argv[i], "hwcache") == 0) {
+ char *old_event_glob = ps->event_glob;
+
+ ps->event_glob = strdup("legacy cache");
+ if (!ps->event_glob) {
+ ret = -1;
+ goto out;
+ }
perf_pmus__print_pmu_events(&print_cb, ps);
- default_ps.exclude_abi = false;
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
+ } else if (strcmp(argv[i], "pmu") == 0) {
+ ps->exclude_abi = true;
+ perf_pmus__print_pmu_events(&print_cb, ps);
+ ps->exclude_abi = false;
} else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(&print_cb, ps);
else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
- default_ps.metricgroups = false;
- default_ps.metrics = true;
+ ps->metricgroups = false;
+ ps->metrics = true;
metricgroup__print(&print_cb, ps);
} else if (strcmp(argv[i], "metricgroup") == 0 ||
strcmp(argv[i], "metricgroups") == 0) {
- default_ps.metricgroups = true;
- default_ps.metrics = false;
+ ps->metricgroups = true;
+ ps->metrics = false;
metricgroup__print(&print_cb, ps);
}
#ifdef HAVE_LIBPFM
@@ -675,43 +721,40 @@ int cmd_list(int argc, const char **argv)
print_libpfm_events(&print_cb, ps);
#endif
else if ((sep = strchr(argv[i], ':')) != NULL) {
- char *old_pmu_glob = default_ps.pmu_glob;
- char *old_event_glob = default_ps.event_glob;
+ char *old_pmu_glob = ps->pmu_glob;
+ char *old_event_glob = ps->event_glob;
- default_ps.event_glob = strdup(argv[i]);
- if (!default_ps.event_glob) {
+ ps->event_glob = strdup(argv[i]);
+ if (!ps->event_glob) {
ret = -1;
goto out;
}
- default_ps.pmu_glob = strdup("tracepoint");
- if (!default_ps.pmu_glob) {
- zfree(&default_ps.event_glob);
+ ps->pmu_glob = strdup("tracepoint");
+ if (!ps->pmu_glob) {
+ zfree(&ps->event_glob);
ret = -1;
goto out;
}
perf_pmus__print_pmu_events(&print_cb, ps);
- zfree(&default_ps.pmu_glob);
- default_ps.pmu_glob = old_pmu_glob;
+ zfree(&ps->pmu_glob);
+ ps->pmu_glob = old_pmu_glob;
print_sdt_events(&print_cb, ps);
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ ps->metrics = true;
+ ps->metricgroups = true;
metricgroup__print(&print_cb, ps);
- zfree(&default_ps.event_glob);
- default_ps.event_glob = old_event_glob;
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
printf("Critical: Not enough memory! Trying to continue...\n");
continue;
}
- default_ps.event_glob = s;
- print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX);
- print_hwcache_events(&print_cb, ps);
+ ps->event_glob = s;
perf_pmus__print_pmu_events(&print_cb, ps);
print_sdt_events(&print_cb, ps);
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ ps->metrics = true;
+ ps->metricgroups = true;
metricgroup__print(&print_cb, ps);
free(s);
}
@@ -719,12 +762,12 @@ int cmd_list(int argc, const char **argv)
out:
print_cb.print_end(ps);
- free(default_ps.pmu_glob);
- free(default_ps.last_topic);
- free(default_ps.last_metricgroups);
- strlist__delete(default_ps.visited_metrics);
+ free(ps->pmu_glob);
+ free(ps->last_topic);
+ free(ps->last_metricgroups);
+ strlist__delete(ps->visited_metrics);
if (output_path)
- fclose(default_ps.fp);
+ fclose(ps->fp);
return ret;
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd49703021fd..e8962c985d34 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1867,6 +1867,7 @@ static int __cmd_report(bool display_info)
eops.sample = process_sample_event;
eops.comm = perf_event__process_comm;
eops.mmap = perf_event__process_mmap;
+ eops.mmap2 = perf_event__process_mmap2;
eops.namespaces = perf_event__process_namespaces;
eops.tracing_data = perf_event__process_tracing_data;
session = perf_session__new(&data, &eops);
@@ -2009,6 +2010,7 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
.cgroups = RB_ROOT,
};
+ struct perf_env host_env;
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
if (!lockhash_table)
@@ -2022,9 +2024,13 @@ static int __cmd_contention(int argc, const char **argv)
eops.sample = process_sample_event;
eops.comm = perf_event__process_comm;
eops.mmap = perf_event__process_mmap;
+ eops.mmap2 = perf_event__process_mmap2;
eops.tracing_data = perf_event__process_tracing_data;
- session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ perf_env__init(&host_env);
+ session = __perf_session__new(use_bpf ? NULL : &data, &eops,
+ /*trace_event_repipe=*/false, &host_env);
+
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
err = PTR_ERR(session);
@@ -2142,6 +2148,7 @@ out_delete:
evlist__delete(con.evlist);
lock_contention_finish(&con);
perf_session__delete(session);
+ perf_env__exit(&host_env);
zfree(&lockhash_table);
return err;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index c6496adff3fe..d43500b92a7b 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7ea3a11aca70..2584d0d8bc82 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -730,8 +730,6 @@ static void record__sig_exit(void)
raise(signr);
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-
static int record__process_auxtrace(const struct perf_tool *tool,
struct mmap *map,
union perf_event *event, void *data1,
@@ -889,40 +887,6 @@ static int record__auxtrace_init(struct record *rec)
return auxtrace_parse_filters(rec->evlist);
}
-#else
-
-static inline
-int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
- struct mmap *map __maybe_unused)
-{
- return 0;
-}
-
-static inline
-void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
- bool on_exit __maybe_unused)
-{
-}
-
-static inline
-int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
-{
- return 0;
-}
-
-static inline
-int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
-{
- return 0;
-}
-
-static int record__auxtrace_init(struct record *rec __maybe_unused)
-{
- return 0;
-}
-
-#endif
-
static int record__config_text_poke(struct evlist *evlist)
{
struct evsel *evsel;
@@ -983,7 +947,6 @@ static int record__config_tracking_events(struct record *rec)
*/
if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
perf_pmus__num_core_pmus() > 1) {
-
/*
* User space tasks can migrate between CPUs, so when tracing
* selected CPUs, sideband for all CPUs is still needed.
@@ -1388,10 +1351,27 @@ static int record__open(struct record *rec)
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
int rc = 0;
+ bool skipped = false;
+ bool removed_tracking = false;
evlist__for_each_entry(evlist, pos) {
+ if (removed_tracking) {
+ /*
+ * Normally the head of the list has tracking enabled
+ * for sideband data like mmaps. If this event is
+ * removed, make sure to add tracking to the next
+ * processed event.
+ */
+ if (!pos->tracking) {
+ pos->tracking = true;
+ evsel__config(pos, opts, &callchain_param);
+ }
+ removed_tracking = false;
+ }
try_again:
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
+ bool report_error = true;
+
if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
@@ -1403,15 +1383,72 @@ try_again:
pos = evlist__reset_weak_group(evlist, pos, true);
goto try_again;
}
- rc = -errno;
- evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
- goto out;
+#if defined(__aarch64__) || defined(__arm__)
+ if (strstr(evsel__name(pos), "cycles")) {
+ struct evsel *pos2;
+ /*
+ * Unfortunately ARM has many events named
+ * "cycles" on PMUs like the system-level (L3)
+ * cache which don't support sampling. Only
+ * display such failures to open when there is
+ * only 1 cycles event or verbose is enabled.
+ */
+ evlist__for_each_entry(evlist, pos2) {
+ if (pos2 == pos)
+ continue;
+ if (strstr(evsel__name(pos2), "cycles")) {
+ report_error = false;
+ break;
+ }
+ }
+ }
+#endif
+ if (report_error || verbose > 0) {
+ ui__error("Failure to open event '%s' on PMU '%s' which will be "
+ "removed.\n%s\n",
+ evsel__name(pos), evsel__pmu_name(pos), msg);
+ }
+ if (pos->tracking)
+ removed_tracking = true;
+ pos->skippable = true;
+ skipped = true;
}
-
- pos->supported = true;
}
+ if (skipped) {
+ struct evsel *tmp;
+ int idx = 0;
+ bool evlist_empty = true;
+
+ /* Remove evsels that failed to open and update indices. */
+ evlist__for_each_entry_safe(evlist, tmp, pos) {
+ if (pos->skippable) {
+ evlist__remove(evlist, pos);
+ continue;
+ }
+
+ /*
+ * Note, dummy events may be command line parsed or
+ * added by the tool. We care about supporting `perf
+ * record -e dummy` which may be used as a permission
+ * check. Dummy events that are added to the command
+ * line and opened along with other events that fail,
+ * will still fail as if the dummy events were tool
+ * added events for the sake of code simplicity.
+ */
+ if (!evsel__is_dummy_event(pos))
+ evlist_empty = false;
+ }
+ evlist__for_each_entry(evlist, pos) {
+ pos->core.idx = idx++;
+ }
+ /* If list is empty then fail. */
+ if (evlist_empty) {
+ ui__error("Failure to open any events for recording.\n");
+ rc = -1;
+ goto out;
+ }
+ }
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
@@ -1817,15 +1854,14 @@ record__finish_output(struct record *rec)
}
/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
- if (!rec->no_buildid) {
+ if (!rec->no_buildid || !rec->no_buildid_cache) {
process_buildids(rec);
if (rec->buildid_all)
perf_session__dsos_hit_all(rec->session);
}
perf_session__write_header(rec->session, rec->evlist, fd, true);
-
- return;
+ perf_session__cache_build_ids(rec->session);
}
static int record__synthesize_workload(struct record *rec, bool tail)
@@ -2885,11 +2921,11 @@ out_free_threads:
rec->bytes_written += off_cpu_write(rec->session);
record__read_lost_samples(rec);
- record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;
if (!err) {
+ record__synthesize(rec, true);
if (!rec->timestamp_filename) {
record__finish_output(rec);
} else {
@@ -3010,7 +3046,7 @@ static int perf_record_config(const char *var, const char *value, void *cb)
else if (!strcmp(value, "no-cache"))
rec->no_buildid_cache = true;
else if (!strcmp(value, "skip"))
- rec->no_buildid = true;
+ rec->no_buildid = rec->no_buildid_cache = true;
else if (!strcmp(value, "mmap"))
rec->buildid_mmap = true;
else if (!strcmp(value, "no-mmap"))
@@ -4119,24 +4155,25 @@ int cmd_record(int argc, const char **argv)
record.opts.record_switch_events = true;
}
- if (!rec->buildid_mmap) {
- pr_debug("Disabling build id in synthesized mmap2 events.\n");
- symbol_conf.no_buildid_mmap2 = true;
- } else if (rec->buildid_mmap_set) {
- /*
- * Explicitly passing --buildid-mmap disables buildid processing
- * and cache generation.
- */
- rec->no_buildid = true;
- }
if (rec->buildid_mmap && !perf_can_record_build_id()) {
pr_warning("Missing support for build id in kernel mmap events.\n"
"Disable this warning with --no-buildid-mmap\n");
rec->buildid_mmap = false;
}
+
if (rec->buildid_mmap) {
/* Enable perf_event_attr::build_id bit. */
rec->opts.build_id = true;
+ /* Disable build-ID table in the header. */
+ rec->no_buildid = true;
+ } else {
+ pr_debug("Disabling build id in synthesized mmap2 events.\n");
+ symbol_conf.no_buildid_mmap2 = true;
+ }
+
+ if (rec->no_buildid_set && rec->no_buildid) {
+ /* -B implies -N for historic reasons. */
+ rec->no_buildid_cache = true;
}
if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
@@ -4233,7 +4270,7 @@ int cmd_record(int argc, const char **argv)
err = -ENOMEM;
- if (rec->no_buildid_cache || rec->no_buildid) {
+ if (rec->no_buildid_cache) {
disable_buildid_cache();
} else if (rec->switch_output.enabled) {
/*
@@ -4268,9 +4305,13 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0) {
- err = parse_event(rec->evlist, "cycles:P");
- if (err)
+ struct evlist *def_evlist = evlist__new_default();
+
+ if (!def_evlist)
goto out;
+
+ evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
+ evlist__delete(def_evlist);
}
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 35df04dad2fd..add6b1c2aaf0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -240,10 +240,11 @@ static void setup_forced_leader(struct report *report,
evlist__force_leader(evlist);
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- struct report *rep = container_of(session->tool, struct report, tool);
+ struct report *rep = container_of(tool, struct report, tool);
if (event->feat.feat_id < HEADER_LAST_FEATURE)
return perf_event__process_feature(session, event);
@@ -1613,6 +1614,7 @@ repeat:
report.tool.event_update = perf_event__process_event_update;
report.tool.feature = process_feature_event;
report.tool.ordering_requires_timestamps = true;
+ report.tool.merge_deferred_callchains = !dump_trace;
session = perf_session__new(&data, &report.tool);
if (IS_ERR(session)) {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f166d6cbc083..eca3b1c58c4b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1532,35 +1532,24 @@ static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unus
return 0;
}
-union map_priv {
- void *ptr;
- bool color;
-};
-
static bool thread__has_color(struct thread *thread)
{
- union map_priv priv = {
- .ptr = thread__priv(thread),
- };
-
- return priv.color;
+ return thread__priv(thread) != NULL;
}
static struct thread*
map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
{
struct thread *thread = machine__findnew_thread(machine, pid, tid);
- union map_priv priv = {
- .color = false,
- };
+ bool color = false;
if (!sched->map.color_pids || !thread || thread__priv(thread))
return thread;
if (thread_map__has(sched->map.color_pids, tid))
- priv.color = true;
+ color = true;
- thread__set_priv(thread, priv.ptr);
+ thread__set_priv(thread, color ? ((void*)1) : NULL);
return thread;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d9fbdcf72f25..62e43d3c5ad7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -33,6 +33,7 @@
#include "util/path.h"
#include "util/event.h"
#include "util/mem-info.h"
+#include "util/metricgroup.h"
#include "ui/ui.h"
#include "print_binary.h"
#include "print_insn.h"
@@ -43,6 +44,7 @@
#include <linux/stringify.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
+#include <linux/unaligned.h>
#include <sys/utsname.h>
#include "asm/bug.h"
#include "util/mem-events.h"
@@ -223,7 +225,7 @@ enum {
OUTPUT_TYPE_MAX
};
-// We need to refactor the evsel->priv use in in 'perf script' to allow for
+// We need to refactor the evsel->priv use in 'perf script' to allow for
// using that area, that is being used only in some cases.
#define OUTPUT_TYPE_UNSET -1
@@ -340,16 +342,8 @@ struct evsel_script {
char *filename;
FILE *fp;
u64 samples;
- /* For metric output */
- u64 val;
- int gnum;
};
-static inline struct evsel_script *evsel_script(struct evsel *evsel)
-{
- return (struct evsel_script *)evsel->priv;
-}
-
static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
{
struct evsel_script *es = zalloc(sizeof(*es));
@@ -1224,7 +1218,6 @@ static int any_dump_insn(struct evsel *evsel __maybe_unused,
u8 *inbuf, int inlen, int *lenp,
FILE *fp)
{
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
if (PRINT_FIELD(BRSTACKDISASM)) {
int printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit,
(uint8_t *)inbuf, inlen, ip, lenp,
@@ -1233,7 +1226,6 @@ static int any_dump_insn(struct evsel *evsel __maybe_unused,
if (printed > 0)
return printed;
}
-#endif
return fprintf(fp, "%s", dump_insn(x, ip, inbuf, inlen, lenp));
}
@@ -2003,6 +1995,25 @@ static int perf_sample__fprintf_synth_iflag_chg(struct perf_sample *sample, FILE
return len + perf_sample__fprintf_pt_spacing(len, fp);
}
+static int perf_sample__fprintf_synth_vpadtl(struct perf_sample *data, FILE *fp)
+{
+ struct powerpc_vpadtl_entry *dtl = (struct powerpc_vpadtl_entry *)data->raw_data;
+ int len;
+
+ len = fprintf(fp, "timebase: %" PRIu64 " dispatch_reason:%s, preempt_reason:%s,\n"
+ "enqueue_to_dispatch_time:%d, ready_to_enqueue_time:%d,"
+ "waiting_to_ready_time:%d, processor_id: %d",
+ get_unaligned_be64(&dtl->timebase),
+ dispatch_reasons[dtl->dispatch_reason],
+ preempt_reasons[dtl->preempt_reason],
+ be32_to_cpu(dtl->enqueue_to_dispatch_time),
+ be32_to_cpu(dtl->ready_to_enqueue_time),
+ be32_to_cpu(dtl->waiting_to_ready_time),
+ be16_to_cpu(dtl->processor_id));
+
+ return len;
+}
+
static int perf_sample__fprintf_synth(struct perf_sample *sample,
struct evsel *evsel, FILE *fp)
{
@@ -2025,6 +2036,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return perf_sample__fprintf_synth_evt(sample, fp);
case PERF_SYNTH_INTEL_IFLAG_CHG:
return perf_sample__fprintf_synth_iflag_chg(sample, fp);
+ case PERF_SYNTH_POWERPC_VPA_DTL:
+ return perf_sample__fprintf_synth_vpadtl(sample, fp);
default:
break;
}
@@ -2104,13 +2117,161 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
fputs("\tmetric: ", mctx->fp);
}
-static void perf_sample__fprint_metric(struct perf_script *script,
- struct thread *thread,
+struct script_find_metrics_args {
+ struct evlist *evlist;
+ bool system_wide;
+};
+
+static struct evsel *map_metric_evsel_to_script_evsel(struct evlist *script_evlist,
+ struct evsel *metric_evsel)
+{
+ struct evsel *script_evsel;
+
+ evlist__for_each_entry(script_evlist, script_evsel) {
+ /* Skip if perf_event_attr differ. */
+ if (metric_evsel->core.attr.type != script_evsel->core.attr.type)
+ continue;
+ if (metric_evsel->core.attr.config != script_evsel->core.attr.config)
+ continue;
+ /* Skip if the script event has a metric_id that doesn't match. */
+ if (script_evsel->metric_id &&
+ strcmp(evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel))) {
+ pr_debug("Skipping matching evsel due to differing metric ids '%s' vs '%s'\n",
+ evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel));
+ continue;
+ }
+ return script_evsel;
+ }
+ return NULL;
+}
+
+static int script_find_metrics(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *data)
+{
+ struct script_find_metrics_args *args = data;
+ struct evlist *script_evlist = args->evlist;
+ struct evlist *metric_evlist = evlist__new();
+ struct evsel *metric_evsel;
+ int ret = metricgroup__parse_groups(metric_evlist,
+ /*pmu=*/"all",
+ pm->metric_name,
+ /*metric_no_group=*/false,
+ /*metric_no_merge=*/false,
+ /*metric_no_threshold=*/true,
+ /*user_requested_cpu_list=*/NULL,
+ args->system_wide,
+ /*hardware_aware_grouping=*/false);
+
+ if (ret) {
+ /* Metric parsing failed but continue the search. */
+ goto out;
+ }
+
+ /*
+ * Check the script_evlist has an entry for each metric_evlist entry. If
+ * the script evsel was already set up avoid changing data that may
+ * break it.
+ */
+ evlist__for_each_entry(metric_evlist, metric_evsel) {
+ struct evsel *script_evsel =
+ map_metric_evsel_to_script_evsel(script_evlist, metric_evsel);
+ struct evsel *new_metric_leader;
+
+ if (!script_evsel) {
+ pr_debug("Skipping metric '%s' as evsel '%s' / '%s' is missing\n",
+ pm->metric_name, evsel__name(metric_evsel),
+ evsel__metric_id(metric_evsel));
+ goto out;
+ }
+
+ if (script_evsel->metric_leader == NULL)
+ continue;
+
+ if (metric_evsel->metric_leader == metric_evsel) {
+ new_metric_leader = script_evsel;
+ } else {
+ new_metric_leader =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ metric_evsel->metric_leader);
+ }
+ /* Mismatching evsel leaders. */
+ if (script_evsel->metric_leader != new_metric_leader) {
+ pr_debug("Skipping metric '%s' due to mismatching evsel metric leaders '%s' vs '%s'\n",
+ pm->metric_name, evsel__metric_id(metric_evsel),
+ evsel__metric_id(script_evsel));
+ goto out;
+ }
+ }
+ /*
+ * Metric events match those in the script evlist, copy metric evsel
+ * data into the script evlist.
+ */
+ evlist__for_each_entry(metric_evlist, metric_evsel) {
+ struct evsel *script_evsel =
+ map_metric_evsel_to_script_evsel(script_evlist, metric_evsel);
+ struct metric_event *metric_me = metricgroup__lookup(&metric_evlist->metric_events,
+ metric_evsel,
+ /*create=*/false);
+
+ if (script_evsel->metric_id == NULL) {
+ script_evsel->metric_id = metric_evsel->metric_id;
+ metric_evsel->metric_id = NULL;
+ }
+
+ if (script_evsel->metric_leader == NULL) {
+ if (metric_evsel->metric_leader == metric_evsel) {
+ script_evsel->metric_leader = script_evsel;
+ } else {
+ script_evsel->metric_leader =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ metric_evsel->metric_leader);
+ }
+ }
+
+ if (metric_me) {
+ struct metric_expr *expr;
+ struct metric_event *script_me =
+ metricgroup__lookup(&script_evlist->metric_events,
+ script_evsel,
+ /*create=*/true);
+
+ if (!script_me) {
+ /*
+ * As the metric_expr is created, the only
+ * failure is a lack of memory.
+ */
+ goto out;
+ }
+ list_splice_init(&metric_me->head, &script_me->head);
+ list_for_each_entry(expr, &script_me->head, nd) {
+ for (int i = 0; expr->metric_events[i]; i++) {
+ expr->metric_events[i] =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ expr->metric_events[i]);
+ }
+ }
+ }
+ }
+ pr_debug("Found metric '%s' whose evsels match those of in the perf data\n",
+ pm->metric_name);
+ evlist__delete(metric_evlist);
+out:
+ return 0;
+}
+
+static struct aggr_cpu_id script_aggr_cpu_id_get(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__global(cpu, /*data=*/NULL);
+}
+
+static void perf_sample__fprint_metric(struct thread *thread,
struct evsel *evsel,
struct perf_sample *sample,
FILE *fp)
{
- struct evsel *leader = evsel__leader(evsel);
+ static bool init_metrics;
struct perf_stat_output_ctx ctx = {
.print_metric = script_print_metric,
.new_line = script_new_line,
@@ -2122,23 +2283,84 @@ static void perf_sample__fprint_metric(struct perf_script *script,
},
.force_header = false,
};
- struct evsel *ev2;
- u64 val;
+ struct perf_counts_values *count, *old_count;
+ int cpu_map_idx, thread_map_idx, aggr_idx;
+ struct evsel *pos;
+
+ if (!init_metrics) {
+ /* One time initialization of stat_config and metric data. */
+ struct script_find_metrics_args args = {
+ .evlist = evsel->evlist,
+ .system_wide = perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1,
+
+ };
+ if (!stat_config.output)
+ stat_config.output = stdout;
+
+ if (!stat_config.aggr_map) {
+ /* TODO: currently only global aggregation is supported. */
+ assert(stat_config.aggr_mode == AGGR_GLOBAL);
+ stat_config.aggr_get_id = script_aggr_cpu_id_get;
+ stat_config.aggr_map =
+ cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
+ aggr_cpu_id__global, /*data=*/NULL,
+ /*needs_sort=*/false);
+ }
+
+ metricgroup__for_each_metric(pmu_metrics_table__find(), script_find_metrics, &args);
+ init_metrics = true;
+ }
- if (!evsel->stats)
- evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
- if (evsel_script(leader)->gnum++ == 0)
- perf_stat__reset_shadow_stats();
- val = sample->period * evsel->scale;
- evsel_script(evsel)->val = val;
- if (evsel_script(leader)->gnum == leader->core.nr_members) {
- for_each_group_member (ev2, leader) {
- perf_stat__print_shadow_stats(&stat_config, ev2,
- evsel_script(ev2)->val,
- sample->cpu,
- &ctx);
+ if (!evsel->stats) {
+ if (evlist__alloc_stats(&stat_config, evsel->evlist, /*alloc_raw=*/true) < 0)
+ return;
+ }
+ if (!evsel->stats->aggr) {
+ if (evlist__alloc_aggr_stats(evsel->evlist, stat_config.aggr_map->nr) < 0)
+ return;
+ }
+
+ /* Update the evsel's count using the sample's data. */
+ cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){sample->cpu});
+ if (cpu_map_idx < 0) {
+ /* Missing CPU, check for any CPU. */
+ if (perf_cpu_map__cpu(evsel->core.cpus, /*idx=*/0).cpu == -1 ||
+ sample->cpu == (u32)-1) {
+ /* Place the counts in the which ever CPU is first in the map. */
+ cpu_map_idx = 0;
+ } else {
+ pr_info("Missing CPU map entry for CPU %d\n", sample->cpu);
+ return;
+ }
+ }
+ thread_map_idx = perf_thread_map__idx(evsel->core.threads, sample->tid);
+ if (thread_map_idx < 0) {
+ /* Missing thread, check for any thread. */
+ if (perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1 ||
+ sample->tid == (u32)-1) {
+ /* Place the counts in the which ever thread is first in the map. */
+ thread_map_idx = 0;
+ } else {
+ pr_info("Missing thread map entry for thread %d\n", sample->tid);
+ return;
+ }
+ }
+ count = perf_counts(evsel->counts, cpu_map_idx, thread_map_idx);
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread_map_idx);
+ count->val = old_count->val + sample->period;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+
+ /* Update the aggregated stats. */
+ perf_stat_process_counter(&stat_config, evsel);
+
+ /* Display all metrics. */
+ evlist__for_each_entry(evsel->evlist, pos) {
+ cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
+ perf_stat__print_shadow_stats(&stat_config, pos,
+ aggr_idx,
+ &ctx);
}
- evsel_script(leader)->gnum = 0;
}
}
@@ -2320,7 +2542,7 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(METRIC))
- perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+ perf_sample__fprint_metric(thread, evsel, sample, fp);
if (verbose > 0)
fflush(fp);
@@ -2484,6 +2706,94 @@ out_put:
return ret;
}
+static int process_deferred_sample_event(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel_script *es = evsel->priv;
+ unsigned int type = output_type(attr->type);
+ struct addr_location al;
+ FILE *fp = es->fp;
+ int ret = 0;
+
+ if (output[type].fields == 0)
+ return 0;
+
+ /* Set thread to NULL to indicate addr_al and al are not initialized */
+ addr_location__init(&al);
+
+ if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+ sample->time)) {
+ goto out_put;
+ }
+
+ if (debug_mode) {
+ if (sample->time < last_timestamp) {
+ pr_err("Samples misordered, previous: %" PRIu64
+ " this: %" PRIu64 "\n", last_timestamp,
+ sample->time);
+ nr_unordered++;
+ }
+ last_timestamp = sample->time;
+ goto out_put;
+ }
+
+ if (filter_cpu(sample))
+ goto out_put;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ ret = -1;
+ goto out_put;
+ }
+
+ if (al.filtered)
+ goto out_put;
+
+ if (!show_event(sample, evsel, al.thread, &al, NULL))
+ goto out_put;
+
+ if (evswitch__discard(&scr->evswitch, evsel))
+ goto out_put;
+
+ perf_sample__fprintf_start(scr, sample, al.thread, evsel,
+ PERF_RECORD_CALLCHAIN_DEFERRED, fp);
+ fprintf(fp, "DEFERRED CALLCHAIN [cookie: %llx]",
+ (unsigned long long)event->callchain_deferred.cookie);
+
+ if (PRINT_FIELD(IP)) {
+ struct callchain_cursor *cursor = NULL;
+
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al.thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack)) {
+ pr_info("cannot resolve deferred callchains\n");
+ cursor = NULL;
+ }
+ }
+
+ fputc(cursor ? '\n' : ' ', fp);
+ sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts,
+ cursor, symbol_conf.bt_stop_list, fp);
+ }
+
+ fprintf(fp, "\n");
+
+ if (verbose > 0)
+ fflush(fp);
+
+out_put:
+ addr_location__exit(&al);
+ return ret;
+}
+
// Used when scr->per_event_dump is not set
static struct evsel_script es_stdout;
@@ -2701,7 +3011,8 @@ static int process_switch_event(const struct perf_tool *tool,
sample->tid);
}
-static int process_auxtrace_error(struct perf_session *session,
+static int process_auxtrace_error(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
if (scripting_ops && scripting_ops->process_auxtrace_error) {
@@ -2709,7 +3020,7 @@ static int process_auxtrace_error(struct perf_session *session,
return 0;
}
- return perf_event__process_auxtrace_error(session, event);
+ return perf_event__process_auxtrace_error(tool, session, event);
}
static int
@@ -2757,7 +3068,8 @@ process_bpf_events(const struct perf_tool *tool __maybe_unused,
}
static int
-process_bpf_metadata_event(struct perf_session *session __maybe_unused,
+process_bpf_metadata_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
perf_event__fprintf(event, NULL, stdout);
@@ -3516,7 +3828,8 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
-static int process_stat_round_event(struct perf_session *session,
+static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *round = &event->stat_round;
@@ -3531,7 +3844,8 @@ static int process_stat_round_event(struct perf_session *session,
return 0;
}
-static int process_stat_config_event(struct perf_session *session __maybe_unused,
+static int process_stat_config_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -3565,10 +3879,10 @@ static int set_maps(struct perf_script *script)
}
static
-int process_thread_map_event(struct perf_session *session,
+int process_thread_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3587,10 +3901,10 @@ int process_thread_map_event(struct perf_session *session,
}
static
-int process_cpu_map_event(struct perf_session *session,
+int process_cpu_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3608,7 +3922,8 @@ int process_cpu_map_event(struct perf_session *session,
return set_maps(script);
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (event->feat.feat_id < HEADER_LAST_FEATURE)
@@ -3616,14 +3931,13 @@ static int process_feature_event(struct perf_session *session,
return 0;
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-static int perf_script__process_auxtrace_info(struct perf_session *session,
+static int perf_script__process_auxtrace_info(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- int ret = perf_event__process_auxtrace_info(session, event);
+ int ret = perf_event__process_auxtrace_info(tool, session, event);
if (ret == 0) {
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
ret = perf_script__setup_per_event_dump(script);
@@ -3631,9 +3945,6 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
return ret;
}
-#else
-#define perf_script__process_auxtrace_info 0
-#endif
static int parse_insn_trace(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
@@ -3698,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
bool header_only = false;
bool script_started = false;
bool unsorted_dump = false;
+ bool merge_deferred_callchains = true;
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
@@ -3851,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
"Guest code can be found in hypervisor process"),
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
"Enable LBR callgraph stitching approach"),
+ OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
+ "Enable merge deferred user callchains"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -4080,6 +4394,7 @@ script_found:
perf_tool__init(&script.tool, !unsorted_dump);
script.tool.sample = process_sample_event;
+ script.tool.callchain_deferred = process_deferred_sample_event;
script.tool.mmap = perf_event__process_mmap;
script.tool.mmap2 = perf_event__process_mmap2;
script.tool.comm = perf_event__process_comm;
@@ -4106,6 +4421,7 @@ script_found:
script.tool.throttle = process_throttle_event;
script.tool.unthrottle = process_throttle_event;
script.tool.ordering_requires_timestamps = true;
+ script.tool.merge_deferred_callchains = merge_deferred_callchains;
session = perf_session__new(&data, &script.tool);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2c38dd98f6ca..ab40d85fb125 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -74,6 +74,7 @@
#include "util/intel-tpebs.h"
#include "asm/bug.h"
+#include <linux/list_sort.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>
@@ -96,9 +97,18 @@
#include <perf/evlist.h>
#include <internal/threadmap.h>
+#ifdef HAVE_BPF_SKEL
+#include "util/bpf_skel/bperf_cgroup.h"
+#endif
+
#define DEFAULT_SEPARATOR " "
#define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi"
+struct rusage_stats {
+ struct stats ru_utime_usec_stat;
+ struct stats ru_stime_usec_stat;
+};
+
static void print_counters(struct timespec *ts, int argc, const char **argv);
static struct evlist *evsel_list;
@@ -128,6 +138,7 @@ static bool interval_count;
static const char *output_name;
static int output_fd;
static char *metrics;
+static struct rusage_stats ru_stats;
struct perf_stat {
bool record;
@@ -228,7 +239,7 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
static void perf_stat__reset_stats(void)
{
evlist__reset_stats(evsel_list);
- perf_stat__reset_shadow_stats();
+ memset(stat_config.walltime_nsecs_stats, 0, sizeof(*stat_config.walltime_nsecs_stats));
}
static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
@@ -278,17 +289,27 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa
if (err && cpu_map_idx == 0 &&
(evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME ||
evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) {
- u64 val, *start_time;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu_map_idx, thread);
+ struct perf_counts_values *old_count = NULL;
+ u64 val;
+
+ if (counter->prev_raw_counts)
+ old_count = perf_counts(counter->prev_raw_counts, cpu_map_idx, thread);
- start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread);
if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
- count->ena = count->run = *start_time + val;
+
count->val = val;
+ if (old_count) {
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->run++;
+ count->ena++;
+ }
return 0;
}
return err;
@@ -345,7 +366,7 @@ static int read_counter_cpu(struct evsel *counter, int cpu_map_idx)
return 0;
}
-static int read_affinity_counters(void)
+static int read_counters_with_affinity(void)
{
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity;
@@ -366,6 +387,9 @@ static int read_affinity_counters(void)
if (evsel__is_bpf(counter))
continue;
+ if (evsel__is_tool(counter))
+ continue;
+
if (!counter->err)
counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx);
}
@@ -391,16 +415,46 @@ static int read_bpf_map_counters(void)
return 0;
}
-static int read_counters(void)
+static int read_tool_counters(void)
{
- if (!stat_config.stop_read_counter) {
- if (read_bpf_map_counters() ||
- read_affinity_counters())
- return -1;
+ struct evsel *counter;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ int idx;
+
+ if (!evsel__is_tool(counter))
+ continue;
+
+ perf_cpu_map__for_each_idx(idx, counter->core.cpus) {
+ if (!counter->err)
+ counter->err = read_counter_cpu(counter, idx);
+ }
}
return 0;
}
+static int read_counters(void)
+{
+ int ret;
+
+ if (stat_config.stop_read_counter)
+ return 0;
+
+ // Read all BPF counters first.
+ ret = read_bpf_map_counters();
+ if (ret)
+ return ret;
+
+ // Read non-BPF and non-tool counters next.
+ ret = read_counters_with_affinity();
+ if (ret)
+ return ret;
+
+ // Read the tool counters last. This way the duration_time counter
+ // should always be greater than any other counter's enabled time.
+ return read_tool_counters();
+}
+
static void process_counters(void)
{
struct evsel *counter;
@@ -434,8 +488,8 @@ static void process_interval(void)
pr_err("failed to write stat round event\n");
}
- init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
+ init_stats(stat_config.walltime_nsecs_stats);
+ update_stats(stat_config.walltime_nsecs_stats, stat_config.interval * 1000000ULL);
print_counters(&rs, 0, NULL);
}
@@ -610,38 +664,34 @@ static int dispatch_events(bool forks, int timeout, int interval, int *times)
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
- COUNTER_FATAL,
};
-static enum counter_recovery stat_handle_error(struct evsel *counter)
+static enum counter_recovery stat_handle_error(struct evsel *counter, int err)
{
char msg[BUFSIZ];
+
+ assert(!counter->supported);
+
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
- if (errno == EINVAL || errno == ENOSYS ||
- errno == ENOENT || errno == ENXIO) {
- if (verbose > 0)
- ui__warning("%s event is not supported by the kernel.\n",
- evsel__name(counter));
- counter->supported = false;
- /*
- * errored is a sticky flag that means one of the counter's
- * cpu event had a problem and needs to be reexamined.
- */
- counter->errored = true;
-
- if ((evsel__leader(counter) != counter) ||
- !(counter->core.leader->nr_members > 1))
- return COUNTER_SKIP;
- } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) {
+ if (err == EINVAL || err == ENOSYS || err == ENOENT || err == ENXIO) {
+ if (verbose > 0) {
+ evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
+ ui__warning("%s event is not supported by the kernel.\n%s\n",
+ evsel__name(counter), msg);
+ }
+ return COUNTER_SKIP;
+ }
+ if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
+ counter->supported = true;
return COUNTER_RETRY;
- } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP &&
- evsel_list->core.threads &&
- evsel_list->core.threads->err_thread != -1) {
+ }
+ if (target__has_per_thread(&target) && err != EOPNOTSUPP &&
+ evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) {
/*
* For global --per-thread case, skip current
* error thread.
@@ -649,37 +699,85 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if (!thread_map__remove(evsel_list->core.threads,
evsel_list->core.threads->err_thread)) {
evsel_list->core.threads->err_thread = -1;
+ counter->supported = true;
return COUNTER_RETRY;
}
- } else if (counter->skippable) {
- if (verbose > 0)
- ui__warning("skipping event %s that kernel failed to open .\n",
- evsel__name(counter));
- counter->supported = false;
- counter->errored = true;
- return COUNTER_SKIP;
}
+ if (verbose > 0) {
+ evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
+ ui__warning(err == EOPNOTSUPP
+ ? "%s event is not supported by the kernel.\n%s\n"
+ : "skipping event %s that kernel failed to open.\n%s\n",
+ evsel__name(counter), msg);
+ }
+ return COUNTER_SKIP;
+}
- if (errno == EOPNOTSUPP) {
- if (verbose > 0) {
- ui__warning("%s event is not supported by the kernel.\n",
- evsel__name(counter));
- }
- counter->supported = false;
- counter->errored = true;
+static int create_perf_stat_counter(struct evsel *evsel,
+ struct perf_stat_config *config,
+ int cpu_map_idx)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel__leader(evsel);
+
+ /* Reset supported flag as creating a stat counter is retried. */
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
- if ((evsel__leader(counter) != counter) ||
- !(counter->core.leader->nr_members > 1))
- return COUNTER_SKIP;
+ /*
+ * The event is part of non trivial group, let's enable
+ * the group read (for leader) and ID retrieval for all
+ * members.
+ */
+ if (leader->core.nr_members > 1)
+ attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+ attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
+
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ if (config->identifier)
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+ if (config->all_user) {
+ attr->exclude_kernel = 1;
+ attr->exclude_user = 0;
}
- evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
+ if (config->all_kernel) {
+ attr->exclude_kernel = 0;
+ attr->exclude_user = 1;
+ }
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (evsel__is_group_leader(evsel)) {
+ attr->disabled = 1;
+
+ if (target__enable_on_exec(&target))
+ attr->enable_on_exec = 1;
+ }
+
+ return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
+ evsel->core.threads);
+}
+
+static void update_rusage_stats(const struct rusage *rusage)
+{
+ const u64 us_to_ns = 1000;
+ const u64 s_to_ns = 1000000000;
- return COUNTER_FATAL;
+ update_stats(&ru_stats.ru_utime_usec_stat,
+ (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+ update_stats(&ru_stats.ru_stime_usec_stat,
+ (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
}
static int __run_perf_stat(int argc, const char **argv, int run_idx)
@@ -696,8 +794,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity = NULL;
- int err;
- bool second_pass = false;
+ int err, open_err = 0;
+ bool second_pass = false, has_supported_counters;
if (forks) {
if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
@@ -737,14 +835,17 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (target.use_bpf)
break;
- if (counter->reset_group || counter->errored)
+ if (counter->reset_group || !counter->supported)
continue;
if (evsel__is_bperf(counter))
continue;
-try_again:
- if (create_perf_stat_counter(counter, &stat_config, &target,
- evlist_cpu_itr.cpu_map_idx) < 0) {
+ while (true) {
+ if (create_perf_stat_counter(counter, &stat_config,
+ evlist_cpu_itr.cpu_map_idx) == 0)
+ break;
+
+ open_err = errno;
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
@@ -752,29 +853,19 @@ try_again:
* it to later.
* Don't close here because we're in the wrong affinity.
*/
- if ((errno == EINVAL || errno == EBADF) &&
+ if ((open_err == EINVAL || open_err == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
+ counter->supported = true;
second_pass = true;
- continue;
- }
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- err = -1;
- goto err_out;
- case COUNTER_RETRY:
- goto try_again;
- case COUNTER_SKIP:
- continue;
- default:
break;
}
+ if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
+ break;
}
- counter->supported = true;
}
if (second_pass) {
@@ -787,7 +878,7 @@ try_again:
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
- if (!counter->reset_group && !counter->errored)
+ if (!counter->reset_group && counter->supported)
continue;
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
@@ -798,34 +889,29 @@ try_again:
if (!counter->reset_group)
continue;
-try_again_reset:
- pr_debug2("reopening weak %s\n", evsel__name(counter));
- if (create_perf_stat_counter(counter, &stat_config, &target,
- evlist_cpu_itr.cpu_map_idx) < 0) {
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- err = -1;
- goto err_out;
- case COUNTER_RETRY:
- goto try_again_reset;
- case COUNTER_SKIP:
- continue;
- default:
+
+ while (true) {
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
+ if (create_perf_stat_counter(counter, &stat_config,
+ evlist_cpu_itr.cpu_map_idx) == 0)
+ break;
+
+ open_err = errno;
+ if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
break;
- }
}
- counter->supported = true;
}
}
affinity__cleanup(affinity);
affinity = NULL;
+ has_supported_counters = false;
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
perf_evsel__free_fd(&counter->core);
continue;
}
+ has_supported_counters = true;
l = strlen(counter->unit);
if (l > stat_config.unit_width)
@@ -837,6 +923,18 @@ try_again_reset:
goto err_out;
}
}
+ if (!has_supported_counters && !stat_config.null_run) {
+ if (open_err) {
+ evsel__open_strerror(evlist__first(evsel_list), &target, open_err,
+ msg, sizeof(msg));
+ }
+ ui__error("No supported events found.\n%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ err = -1;
+ goto err_out;
+ }
if (evlist__apply_filters(evsel_list, &counter, &target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
@@ -909,10 +1007,20 @@ try_again_reset:
goto err_out;
}
- if (WIFSIGNALED(status))
+ if (WIFSIGNALED(status)) {
+ /*
+ * We want to indicate failure to stop a repeat run,
+ * hence negative. We want the value to be the exit code
+ * of perf, which for termination by a signal is 128
+ * plus the signal number.
+ */
+ err = 0 - (128 + WTERMSIG(status));
psignal(WTERMSIG(status), argv[0]);
+ } else {
+ err = WEXITSTATUS(status);
+ }
} else {
- status = dispatch_events(forks, timeout, interval, &times);
+ err = dispatch_events(forks, timeout, interval, &times);
}
disable_counters();
@@ -925,15 +1033,15 @@ try_again_reset:
if (interval && stat_config.summary) {
stat_config.interval = 0;
stat_config.stop_read_counter = true;
- init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, t1 - t0);
+ init_stats(stat_config.walltime_nsecs_stats);
+ update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
evlist__copy_prev_raw_counts(evsel_list);
evlist__reset_prev_raw_counts(evsel_list);
evlist__reset_aggr_stats(evsel_list);
} else {
- update_stats(&walltime_nsecs_stats, t1 - t0);
- update_rusage_stats(&ru_stats, &stat_config.ru_data);
+ update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
+ update_rusage_stats(&stat_config.ru_data);
}
/*
@@ -952,7 +1060,7 @@ try_again_reset:
if (!STAT_RECORD)
evlist__close(evsel_list);
- return WEXITSTATUS(status);
+ return err;
err_out:
if (forks)
@@ -1822,6 +1930,35 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
return 0;
}
+static int default_evlist_evsel_cmp(void *priv __maybe_unused,
+ const struct list_head *l,
+ const struct list_head *r)
+{
+ const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
+ const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
+ const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
+ const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
+
+ if (evsel__leader(lhs) == evsel__leader(rhs)) {
+ /* Within the same group, respect the original order. */
+ return lhs_core->idx - rhs_core->idx;
+ }
+
+ /* Sort default metrics evsels first, and default show events before those. */
+ if (lhs->default_metricgroup != rhs->default_metricgroup)
+ return lhs->default_metricgroup ? -1 : 1;
+
+ if (lhs->default_show_events != rhs->default_show_events)
+ return lhs->default_show_events ? -1 : 1;
+
+ /* Sort by PMU type (prefers legacy types first). */
+ if (lhs->pmu != rhs->pmu)
+ return lhs->pmu->type - rhs->pmu->type;
+
+ /* Sort by name. */
+ return strcmp(evsel__name((struct evsel *)lhs), evsel__name((struct evsel *)rhs));
+}
+
/*
* Add default events, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1944,48 +2081,39 @@ static int add_default_events(void)
stat_config.topdown_level = 1;
if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
- /* No events so add defaults. */
- if (target__has_cpu(&target))
- ret = parse_events(evlist, "cpu-clock", &err);
- else
- ret = parse_events(evlist, "task-clock", &err);
- if (ret)
- goto out;
-
- ret = parse_events(evlist,
- "context-switches,"
- "cpu-migrations,"
- "page-faults,"
- "instructions,"
- "cycles,"
- "stalled-cycles-frontend,"
- "stalled-cycles-backend,"
- "branches,"
- "branch-misses",
- &err);
- if (ret)
- goto out;
-
/*
- * Add TopdownL1 metrics if they exist. To minimize
- * multiplexing, don't request threshold computation.
+ * Add Default metrics. To minimize multiplexing, don't request
+ * threshold computation, but it will be computed if the events
+ * are present.
*/
- if (metricgroup__has_metric_or_groups(pmu, "Default")) {
- struct evlist *metric_evlist = evlist__new();
+ const char *default_metricgroup_names[] = {
+ "Default", "Default2", "Default3", "Default4",
+ };
+ for (size_t i = 0; i < ARRAY_SIZE(default_metricgroup_names); i++) {
+ struct evlist *metric_evlist;
+
+ if (!metricgroup__has_metric_or_groups(pmu, default_metricgroup_names[i]))
+ continue;
+
+ if ((int)i > detailed_run)
+ break;
+
+ metric_evlist = evlist__new();
if (!metric_evlist) {
ret = -ENOMEM;
- goto out;
+ break;
}
- if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
+ if (metricgroup__parse_groups(metric_evlist, pmu, default_metricgroup_names[i],
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping) < 0) {
+ evlist__delete(metric_evlist);
ret = -1;
- goto out;
+ break;
}
evlist__for_each_entry(metric_evlist, evsel)
@@ -1997,44 +2125,8 @@ static int add_default_events(void)
&metric_evlist->metric_events);
evlist__delete(metric_evlist);
}
- }
+ list_sort(/*priv=*/NULL, &evlist->core.entries, default_evlist_evsel_cmp);
- /* Detailed events get appended to the event list: */
-
- if (!ret && detailed_run >= 1) {
- /*
- * Detailed stats (-d), covering the L1 and last level data
- * caches:
- */
- ret = parse_events(evlist,
- "L1-dcache-loads,"
- "L1-dcache-load-misses,"
- "LLC-loads,"
- "LLC-load-misses",
- &err);
- }
- if (!ret && detailed_run >= 2) {
- /*
- * Very detailed stats (-d -d), covering the instruction cache
- * and the TLB caches:
- */
- ret = parse_events(evlist,
- "L1-icache-loads,"
- "L1-icache-load-misses,"
- "dTLB-loads,"
- "dTLB-load-misses,"
- "iTLB-loads,"
- "iTLB-load-misses",
- &err);
- }
- if (!ret && detailed_run >= 3) {
- /*
- * Very, very detailed stats (-d -d -d), adding prefetch events:
- */
- ret = parse_events(evlist,
- "L1-dcache-prefetches,"
- "L1-dcache-prefetch-misses",
- &err);
}
out:
if (!ret) {
@@ -2043,7 +2135,7 @@ out:
* Make at least one event non-skippable so fatal errors are visible.
* 'cycles' always used to be default and non-skippable, so use that.
*/
- if (strcmp("cycles", evsel__name(evsel)))
+ if (!evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
evsel->skippable = true;
}
}
@@ -2107,7 +2199,8 @@ static int __cmd_record(const struct option stat_options[], struct opt_aggr_mode
return argc;
}
-static int process_stat_round_event(struct perf_session *session,
+static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *stat_round = &event->stat_round;
@@ -2119,7 +2212,7 @@ static int process_stat_round_event(struct perf_session *session,
process_counters();
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
- update_stats(&walltime_nsecs_stats, stat_round->time);
+ update_stats(stat_config.walltime_nsecs_stats, stat_round->time);
if (stat_config.interval && stat_round->time) {
tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
@@ -2132,10 +2225,10 @@ static int process_stat_round_event(struct perf_session *session,
}
static
-int process_stat_config_event(struct perf_session *session,
+int process_stat_config_event(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -2181,10 +2274,10 @@ static int set_maps(struct perf_stat *st)
}
static
-int process_thread_map_event(struct perf_session *session,
+int process_thread_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
if (st->threads) {
@@ -2200,10 +2293,10 @@ int process_thread_map_event(struct perf_session *session,
}
static
-int process_cpu_map_event(struct perf_session *session,
+int process_cpu_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
struct perf_cpu_map *cpus;
@@ -2511,6 +2604,7 @@ int cmd_stat(int argc, const char **argv)
unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" };
char errbuf[BUFSIZ];
+ struct evsel *counter;
setlocale(LC_ALL, "");
@@ -2765,9 +2859,28 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
}
-
+#ifdef HAVE_BPF_SKEL
+ if (target.use_bpf && nr_cgroups &&
+ (evsel_list->core.nr_entries / nr_cgroups) > BPERF_CGROUP__MAX_EVENTS) {
+ pr_warning("Disabling BPF counters due to more events (%d) than the max (%d)\n",
+ evsel_list->core.nr_entries / nr_cgroups, BPERF_CGROUP__MAX_EVENTS);
+ target.use_bpf = false;
+ }
+#endif // HAVE_BPF_SKEL
evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
+ evlist__for_each_entry(evsel_list, counter) {
+ /*
+ * Setup BPF counters to require CPUs as any(-1) isn't
+ * supported. evlist__create_maps below will propagate this
+ * information to the evsels. Note, evsel__is_bperf isn't yet
+ * set up, and this change must happen early, so directly use
+ * the bpf_counter variable and target information.
+ */
+ if ((counter->bpf_counter || target.use_bpf) && !target__has_cpu(&target))
+ counter->core.requires_cpu = true;
+ }
+
if (evlist__create_maps(evsel_list, &target) < 0) {
if (target__has_task(&target)) {
pr_err("Problems finding threads of monitor\n");
@@ -2866,7 +2979,7 @@ int cmd_stat(int argc, const char **argv)
evlist__reset_prev_raw_counts(evsel_list);
status = run_perf_stat(argc, argv, run_idx);
- if (status == -1)
+ if (status < 0)
break;
if (forever && !interval) {
@@ -2907,7 +3020,7 @@ int cmd_stat(int argc, const char **argv)
}
if (!interval) {
- if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+ if (WRITE_STAT_ROUND_EVENT(stat_config.walltime_nsecs_stats->max, FINAL))
pr_err("failed to write stat round event\n");
}
@@ -2936,5 +3049,6 @@ out:
evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
- return status;
+ /* Only the low byte of status becomes the exit code. */
+ return abs(status);
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 22050c640dfa..f8b49d69e9a5 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1651,7 +1651,7 @@ out_delete:
return ret;
}
-static int timechart__io_record(int argc, const char **argv)
+static int timechart__io_record(int argc, const char **argv, const char *output_data)
{
unsigned int rec_argc, i;
const char **rec_argv;
@@ -1659,7 +1659,7 @@ static int timechart__io_record(int argc, const char **argv)
char *filter = NULL;
const char * const common_args[] = {
- "record", "-a", "-R", "-c", "1",
+ "record", "-a", "-R", "-c", "1", "-o", output_data,
};
unsigned int common_args_nr = ARRAY_SIZE(common_args);
@@ -1786,7 +1786,8 @@ static int timechart__io_record(int argc, const char **argv)
}
-static int timechart__record(struct timechart *tchart, int argc, const char **argv)
+static int timechart__record(struct timechart *tchart, int argc, const char **argv,
+ const char *output_data)
{
unsigned int rec_argc, i, j;
const char **rec_argv;
@@ -1794,7 +1795,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar
unsigned int record_elems;
const char * const common_args[] = {
- "record", "-a", "-R", "-c", "1",
+ "record", "-a", "-R", "-c", "1", "-o", output_data,
};
unsigned int common_args_nr = ARRAY_SIZE(common_args);
@@ -1934,6 +1935,7 @@ int cmd_timechart(int argc, const char **argv)
.merge_dist = 1000,
};
const char *output_name = "output.svg";
+ const char *output_record_data = "perf.data";
const struct option timechart_common_options[] = {
OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"),
OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"),
@@ -1976,6 +1978,7 @@ int cmd_timechart(int argc, const char **argv)
OPT_BOOLEAN('I', "io-only", &tchart.io_only,
"record only IO data"),
OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"),
+ OPT_STRING('o', "output", &output_record_data, "file", "output data file name"),
OPT_PARENT(timechart_common_options),
};
const char * const timechart_record_usage[] = {
@@ -2024,9 +2027,9 @@ int cmd_timechart(int argc, const char **argv)
}
if (tchart.io_only)
- ret = timechart__io_record(argc, argv);
+ ret = timechart__io_record(argc, argv, output_record_data);
else
- ret = timechart__record(&tchart, argc, argv);
+ ret = timechart__record(&tchart, argc, argv, output_record_data);
goto out;
} else if (argc)
usage_with_options(timechart_usage, timechart_options);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a11f629c7d76..710604c4f6f6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1695,11 +1695,13 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
if (!top.evlist->core.nr_entries) {
- bool can_profile_kernel = perf_event_paranoid_check(1);
- int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ struct evlist *def_evlist = evlist__new_default();
- if (err)
+ if (!def_evlist)
goto out_delete_evlist;
+
+ evlist__splice_list_tail(top.evlist, &def_evlist->core.entries);
+ evlist__delete(def_evlist);
}
status = evswitch__init(&top.evswitch, top.evlist, stderr);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index fe737b3ac6e6..baee1f695600 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -196,6 +196,7 @@ struct trace {
unsigned int max_stack;
unsigned int min_stack;
enum trace_summary_mode summary_mode;
+ int max_summary;
int raw_augmented_syscalls_args_size;
bool raw_augmented_syscalls;
bool fd_path_disabled;
@@ -2004,7 +2005,9 @@ static int trace__symbols_init(struct trace *trace, int argc, const char **argv,
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->core.threads, trace__tool_process,
- true, false, 1);
+ /*needs_mmap=*/callchain_param.enabled,
+ /*mmap_data=*/false,
+ /*nr_threads_synthesize=*/1);
out:
if (err) {
perf_env__exit(&trace->host_env);
@@ -2066,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
}
+/*
+ * v6.19 kernel added new fields to read userspace memory for event tracing.
+ * But it's not used by perf and confuses the syscall parameters.
+ */
+static bool is_internal_field(struct tep_format_field *field)
+{
+ return !strcmp(field->type, "__data_loc char[]");
+}
+
static struct tep_format_field *
syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
bool *use_btf)
@@ -2074,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
int len;
for (; field; field = field->next, ++arg) {
+ /* assume it's the last argument */
+ if (is_internal_field(field))
+ continue;
+
last_field = field;
if (arg->scnprintf)
@@ -2142,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
{
char tp_name[128];
const char *name;
+ struct tep_format_field *field;
int err;
if (sc->nonexistent)
@@ -2198,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
--sc->nr_args;
}
+ field = sc->args;
+ while (field) {
+ if (is_internal_field(field))
+ --sc->nr_args;
+ field = field->next;
+ }
+
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
@@ -4440,7 +4464,7 @@ create_maps:
if (trace->summary_mode == SUMMARY__BY_TOTAL && !trace->summary_bpf) {
trace->syscall_stats = alloc_syscall_stats();
- if (trace->syscall_stats == NULL)
+ if (IS_ERR(trace->syscall_stats))
goto out_delete_evlist;
}
@@ -4599,7 +4623,7 @@ out_disable:
if (!err) {
if (trace->summary) {
if (trace->summary_bpf)
- trace_print_bpf_summary(trace->output);
+ trace_print_bpf_summary(trace->output, trace->max_summary);
else if (trace->summary_mode == SUMMARY__BY_TOTAL)
trace__fprintf_total_summary(trace, trace->output);
else
@@ -4748,7 +4772,7 @@ static int trace__replay(struct trace *trace)
if (trace->summary_mode == SUMMARY__BY_TOTAL) {
trace->syscall_stats = alloc_syscall_stats();
- if (trace->syscall_stats == NULL)
+ if (IS_ERR(trace->syscall_stats))
goto out;
}
@@ -4822,6 +4846,7 @@ static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp,
struct hashmap *syscall_stats)
{
size_t printed = 0;
+ int lines = 0;
struct syscall *sc;
struct syscall_entry *entries;
@@ -4866,7 +4891,11 @@ static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp,
fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]);
}
}
+ lines++;
}
+
+ if (trace->max_summary && trace->max_summary <= lines)
+ break;
}
free(entries);
@@ -5443,6 +5472,8 @@ int cmd_trace(int argc, const char **argv)
OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
"to customized ones"),
OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"),
+ OPT_INTEGER(0, "max-summary", &trace.max_summary,
+ "Max number of entries in the summary."),
OPTS_EVSWITCH(&trace.evswitch),
OPT_END()
};
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index be519c433ce4..e0537f275da2 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -11,10 +11,16 @@ declare -a FILES=(
"include/uapi/linux/bits.h"
"include/uapi/linux/fadvise.h"
"include/uapi/linux/fscrypt.h"
+ "include/uapi/linux/genetlink.h"
+ "include/uapi/linux/if_addr.h"
+ "include/uapi/linux/in.h"
"include/uapi/linux/kcmp.h"
"include/uapi/linux/kvm.h"
- "include/uapi/linux/in.h"
+ "include/uapi/linux/neighbour.h"
+ "include/uapi/linux/netfilter.h"
+ "include/uapi/linux/netfilter_arp.h"
"include/uapi/linux/perf_event.h"
+ "include/uapi/linux/rtnetlink.h"
"include/uapi/linux/seccomp.h"
"include/uapi/linux/stat.h"
"include/linux/bits.h"
@@ -23,6 +29,7 @@ declare -a FILES=(
"include/linux/const.h"
"include/vdso/const.h"
"include/vdso/unaligned.h"
+ "include/linux/gfp_types.h"
"include/linux/hash.h"
"include/linux/list-sort.h"
"include/uapi/linux/hw_breakpoint.h"
@@ -40,15 +47,12 @@ declare -a FILES=(
"arch/s390/include/uapi/asm/perf_regs.h"
"arch/x86/include/uapi/asm/perf_regs.h"
"arch/x86/include/uapi/asm/kvm.h"
- "arch/x86/include/uapi/asm/kvm_perf.h"
"arch/x86/include/uapi/asm/svm.h"
"arch/x86/include/uapi/asm/unistd.h"
"arch/x86/include/uapi/asm/vmx.h"
"arch/powerpc/include/uapi/asm/kvm.h"
"arch/s390/include/uapi/asm/kvm.h"
- "arch/s390/include/uapi/asm/kvm_perf.h"
"arch/s390/include/uapi/asm/sie.h"
- "arch/arm/include/uapi/asm/kvm.h"
"arch/arm64/include/uapi/asm/kvm.h"
"arch/arm64/include/uapi/asm/unistd.h"
"arch/alpha/include/uapi/asm/errno.h"
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 3cb40965549f..e004178472d9 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -2,9 +2,7 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
-#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 4096
-#endif
enum perf_affinity {
PERF_AFFINITY_SYS = 0,
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 32f387d48908..a46ab7b612df 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,7 +1,5 @@
pmu-events-y += pmu-events.o
-JDIR = pmu-events/arch/$(SRCARCH)
-JSON = $(shell [ -d $(JDIR) ] && \
- find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+JSON = $(shell find pmu-events/arch -name '*.json' -o -name '*.csv')
JDIR_TEST = pmu-events/arch/test
JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \
find $(JDIR_TEST) -name '*.json')
@@ -13,6 +11,8 @@ PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c
METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log
TEST_EMPTY_PMU_EVENTS_C = $(OUTPUT)pmu-events/test-empty-pmu-events.c
EMPTY_PMU_EVENTS_TEST_LOG = $(OUTPUT)pmu-events/empty-pmu-events.log
+LEGACY_CACHE_PY = pmu-events/make_legacy_cache.py
+LEGACY_CACHE_JSON = $(OUTPUT)pmu-events/arch/common/common/legacy-cache.json
ifeq ($(JEVENTS_ARCH),)
JEVENTS_ARCH=$(SRCARCH)
@@ -29,13 +29,26 @@ $(PMU_EVENTS_C): $(EMPTY_PMU_EVENTS_C)
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)cp $< $@
else
+# Copy checked-in json to OUTPUT for generation if it's an out of source build
+ifneq ($(OUTPUT),)
+$(OUTPUT)pmu-events/arch/%: pmu-events/arch/%
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)cp $< $@
+endif
+
+$(LEGACY_CACHE_JSON): $(LEGACY_CACHE_PY)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(LEGACY_CACHE_PY) > $@
+
+GEN_JSON = $(patsubst %,$(OUTPUT)%,$(JSON)) $(LEGACY_CACHE_JSON)
+
$(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY)
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false)
-$(TEST_EMPTY_PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
+$(TEST_EMPTY_PMU_EVENTS_C): $(GEN_JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
$(call rule_mkdir)
- $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) none none pmu-events/arch $@
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) none none $(OUTPUT)pmu-events/arch $@
$(EMPTY_PMU_EVENTS_TEST_LOG): $(EMPTY_PMU_EVENTS_C) $(TEST_EMPTY_PMU_EVENTS_C)
$(call rule_mkdir)
@@ -63,10 +76,10 @@ $(OUTPUT)%.pylint_log: %
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false)
-$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) \
+$(PMU_EVENTS_C): $(GEN_JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) \
$(EMPTY_PMU_EVENTS_TEST_LOG) $(PMU_EVENTS_MYPY_TEST_LOGS) $(PMU_EVENTS_PYLINT_TEST_LOGS)
$(call rule_mkdir)
- $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) $(OUTPUT)pmu-events/arch $@
endif
# pmu-events.c file is generated in the OUTPUT directory so it needs a
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
index afcdad58ef89..324104438e78 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
@@ -113,7 +113,7 @@
{
"MetricName": "load_store_spec_rate",
"MetricExpr": "((LDST_SPEC / INST_SPEC) * 100)",
- "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
@@ -132,7 +132,7 @@
{
"MetricName": "pc_write_spec_rate",
"MetricExpr": "((PC_WRITE_SPEC / INST_SPEC) * 100)",
- "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
@@ -195,14 +195,14 @@
{
"MetricName": "stall_frontend_cache_rate",
"MetricExpr": "((STALL_FRONTEND_CACHE / CPU_CYCLES) * 100)",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and cache miss",
"MetricGroup": "Stall",
"ScaleUnit": "1percent of cycles"
},
{
"MetricName": "stall_frontend_tlb_rate",
"MetricExpr": "((STALL_FRONTEND_TLB / CPU_CYCLES) * 100)",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and TLB miss",
"MetricGroup": "Stall",
"ScaleUnit": "1percent of cycles"
},
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
index 5228f94a793f..a29aadc9b2e3 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
@@ -113,7 +113,7 @@
{
"MetricName": "load_store_spec_rate",
"MetricExpr": "LDST_SPEC / INST_SPEC",
- "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "100percent of operations"
},
@@ -132,7 +132,7 @@
{
"MetricName": "pc_write_spec_rate",
"MetricExpr": "PC_WRITE_SPEC / INST_SPEC",
- "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "100percent of operations"
},
@@ -195,14 +195,14 @@
{
"MetricName": "stall_frontend_cache_rate",
"MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and cache miss",
"MetricGroup": "Stall",
"ScaleUnit": "100percent of cycles"
},
{
"MetricName": "stall_frontend_tlb_rate",
"MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and TLB miss",
"MetricGroup": "Stall",
"ScaleUnit": "100percent of cycles"
},
@@ -388,55 +388,55 @@
"MetricExpr": "L1D_CACHE_RW / L1D_CACHE",
"BriefDescription": "L1D cache access - demand",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
- "MetricName": "l1d_cache_access_prefetces",
+ "MetricName": "l1d_cache_access_prefetches",
"MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE",
"BriefDescription": "L1D cache access - prefetch",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses",
"MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE",
"BriefDescription": "L1D cache demand misses",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses_read",
"MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE",
"BriefDescription": "L1D cache demand misses - read",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses_write",
"MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE",
"BriefDescription": "L1D cache demand misses - write",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_prefetch_misses",
"MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE",
"BriefDescription": "L1D cache prefetch misses",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ase_scalar_mix",
"MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC",
"BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations",
"MetricGroup": "Instructions",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ase_vector_mix",
"MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC",
"BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations",
"MetricGroup": "Instructions",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
index 4cc50b7da526..4001cc5753a7 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
@@ -81,7 +81,7 @@
"BriefDescription": "L2D TLB access"
},
{
- "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count",
+ "PublicDescription": "Level 2 access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2I_TLB_REFILL to count",
"EventCode": "0x35",
"EventName": "L2I_TLB_ACCESS",
"BriefDescription": "L2I TLB access"
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json
new file mode 100644
index 000000000000..aa7b58721dc7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json
@@ -0,0 +1,9 @@
+[
+ {
+ "BriefDescription": "ddr cycles event",
+ "EventCode": "0x00",
+ "EventName": "imx94_ddr.cycles",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json
new file mode 100644
index 000000000000..629f1f52761e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json
@@ -0,0 +1,450 @@
+[
+ {
+ "BriefDescription": "bandwidth usage for lpddr5 evk board",
+ "MetricName": "imx94_bandwidth_usage.lpddr5",
+ "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4266 * 1000000 * 4)",
+ "ScaleUnit": "1e2%",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bandwidth usage for lpddr4 evk board",
+ "MetricName": "imx94_bandwidth_usage.lpddr4",
+ "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4266 * 1000000 * 4)",
+ "ScaleUnit": "1e2%",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all masters read from ddr",
+ "MetricName": "imx94_ddr_read.all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all masters write to ddr",
+ "MetricName": "imx94_ddr_write.all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all a55 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3fc\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all a55 write from ddr",
+ "MetricName": "imx94_ddr_write.a55_all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3fc\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 0 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3ff\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 0 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3ff\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 1 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x001@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 1 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x001@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 2 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x002@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 2 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x002@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 3 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x003@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 3 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x003@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core1 read from ddr",
+ "MetricName": "imx94_ddr_read.m7_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x004@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core1 write to ddr",
+ "MetricName": "imx94_ddr_write.m7_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x004@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core1 (in netc) read from ddr",
+ "MetricName": "imx94_ddr_read.m33_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x005@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core1 (in netc) write to ddr",
+ "MetricName": "imx94_ddr_write.m33_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x005@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie2 read from ddr",
+ "MetricName": "imx94_ddr_read.pcie2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x006@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie2 write to ddr",
+ "MetricName": "imx94_ddr_write.pcie2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x006@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of Cortex-A DSU L3 evicted/ACP transactions read from ddr",
+ "MetricName": "imx94_ddr_read.cortex_a_dsu",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x007@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of Cortex-A DSU L3 evicted/ACP transactions write to ddr",
+ "MetricName": "imx94_ddr_write.cortex_a_dsu",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x007@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core0 read from ddr",
+ "MetricName": "imx94_ddr_read.m33_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x008@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core0 write to ddr",
+ "MetricName": "imx94_ddr_write.m33_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x008@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core0 read from ddr",
+ "MetricName": "imx94_ddr_read.m7_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x009@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core0 write to ddr",
+ "MetricName": "imx94_ddr_write.m7_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x009@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of sentinel read from ddr",
+ "MetricName": "imx94_ddr_read.sentinel",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x00a@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of sentinel write to ddr",
+ "MetricName": "imx94_ddr_write.sentinel",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00a@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma1 read from ddr",
+ "MetricName": "imx94_ddr_read.edma1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x00b@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma1 write to ddr",
+ "MetricName": "imx94_ddr_write.edma1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00b@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma2 read from ddr",
+ "MetricName": "imx94_ddr_read.edma2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x00c@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma2 write to ddr",
+ "MetricName": "imx94_ddr_write.edma2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00c@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of netc read from ddr",
+ "MetricName": "imx94_ddr_read.netc",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x00d@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of netc write to ddr",
+ "MetricName": "imx94_ddr_write.netc",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00d@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of aonmix read from ddr",
+ "MetricName": "imx94_ddr_read.aonmix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x00f@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of aonmix write to ddr",
+ "MetricName": "imx94_ddr_write.aonmix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00f@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of npumix read from ddr",
+ "MetricName": "imx94_ddr_read.npumix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x010@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of npumix write to ddr",
+ "MetricName": "imx94_ddr_write.npumix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x010@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc1 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x0b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc1 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc2 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x0c0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc2 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0c0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc3 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x0d0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc3 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0d0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of xspi read from ddr",
+ "MetricName": "imx94_ddr_read.xspi",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x0f0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of xspi write to ddr",
+ "MetricName": "imx94_ddr_write.xspi",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0f0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie1 read from ddr",
+ "MetricName": "imx94_ddr_read.pcie1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x100@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie1 write to ddr",
+ "MetricName": "imx94_ddr_write.pcie1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x100@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb1 read from ddr",
+ "MetricName": "imx94_ddr_read.usb1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x140@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb1 write to ddr",
+ "MetricName": "imx94_ddr_write.usb1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x140@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb2 read from ddr",
+ "MetricName": "imx94_ddr_read.usb2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x150@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb2 write to ddr",
+ "MetricName": "imx94_ddr_write.usb2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x150@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pxp read from ddr",
+ "MetricName": "imx94_ddr_read.pxp",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x2a0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pxp write to ddr",
+ "MetricName": "imx94_ddr_write.pxp",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x2a0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of dcif read from ddr",
+ "MetricName": "imx94_ddr_read.dcif",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x2b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of dcif write to ddr",
+ "MetricName": "imx94_ddr_write.dcif",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x2b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/legacy-hardware.json b/tools/perf/pmu-events/arch/common/common/legacy-hardware.json
new file mode 100644
index 000000000000..71700647f19b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/common/common/legacy-hardware.json
@@ -0,0 +1,72 @@
+[
+ {
+ "EventName": "cpu-cycles",
+ "BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cycles].",
+ "LegacyConfigCode": "0"
+ },
+ {
+ "EventName": "cycles",
+ "BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cpu-cycles].",
+ "LegacyConfigCode": "0"
+ },
+ {
+ "EventName": "instructions",
+ "BriefDescription": "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.",
+ "LegacyConfigCode": "1"
+ },
+ {
+ "EventName": "cache-references",
+ "BriefDescription": "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.",
+ "LegacyConfigCode": "2"
+ },
+ {
+ "EventName": "cache-misses",
+ "BriefDescription": "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates.",
+ "LegacyConfigCode": "3"
+ },
+ {
+ "EventName": "branches",
+ "BriefDescription": "Retired branch instructions [This event is an alias of branch-instructions].",
+ "LegacyConfigCode": "4"
+ },
+ {
+ "EventName": "branch-instructions",
+ "BriefDescription": "Retired branch instructions [This event is an alias of branches].",
+ "LegacyConfigCode": "4"
+ },
+ {
+ "EventName": "branch-misses",
+ "BriefDescription": "Mispredicted branch instructions.",
+ "LegacyConfigCode": "5"
+ },
+ {
+ "EventName": "bus-cycles",
+ "BriefDescription": "Bus cycles, which can be different from total cycles.",
+ "LegacyConfigCode": "6"
+ },
+ {
+ "EventName": "stalled-cycles-frontend",
+ "BriefDescription": "Stalled cycles during issue [This event is an alias of idle-cycles-frontend].",
+ "LegacyConfigCode": "7"
+ },
+ {
+ "EventName": "idle-cycles-frontend",
+ "BriefDescription": "Stalled cycles during issue [This event is an alias of stalled-cycles-fronted].",
+ "LegacyConfigCode": "7"
+ },
+ {
+ "EventName": "stalled-cycles-backend",
+ "BriefDescription": "Stalled cycles during retirement [This event is an alias of idle-cycles-backend].",
+ "LegacyConfigCode": "8"
+ },
+ {
+ "EventName": "idle-cycles-backend",
+ "BriefDescription": "Stalled cycles during retirement [This event is an alias of stalled-cycles-backend].",
+ "LegacyConfigCode": "8"
+ },
+ {
+ "EventName": "ref-cycles",
+ "BriefDescription": "Total cycles; not affected by CPU frequency scaling.",
+ "LegacyConfigCode": "9"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/metrics.json b/tools/perf/pmu-events/arch/common/common/metrics.json
new file mode 100644
index 000000000000..0d010b3ebc6d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/common/common/metrics.json
@@ -0,0 +1,151 @@
+[
+ {
+ "BriefDescription": "Average CPU utilization",
+ "MetricExpr": "(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)",
+ "MetricGroup": "Default",
+ "MetricName": "CPUs_utilized",
+ "ScaleUnit": "1CPUs",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Context switches per CPU second",
+ "MetricExpr": "(software@context\\-switches\\,name\\=context\\-switches@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "cs_per_second",
+ "ScaleUnit": "1cs/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Process migrations to a new CPU per CPU second",
+ "MetricExpr": "(software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "migrations_per_second",
+ "ScaleUnit": "1migrations/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Page faults per CPU second",
+ "MetricExpr": "(software@page\\-faults\\,name\\=page\\-faults@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "page_faults_per_second",
+ "ScaleUnit": "1faults/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "instructions / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "insn_per_cycle",
+ "MetricThreshold": "insn_per_cycle < 1",
+ "ScaleUnit": "1instructions",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Max front or backend stalls per instruction",
+ "MetricExpr": "max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions",
+ "MetricGroup": "Default",
+ "MetricName": "stalled_cycles_per_instruction",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Frontend stalls per cycle",
+ "MetricExpr": "stalled\\-cycles\\-frontend / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "frontend_cycles_idle",
+ "MetricThreshold": "frontend_cycles_idle > 0.1",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Backend stalls per cycle",
+ "MetricExpr": "stalled\\-cycles\\-backend / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "backend_cycles_idle",
+ "MetricThreshold": "backend_cycles_idle > 0.2",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Cycles per CPU second",
+ "MetricExpr": "cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "cycles_frequency",
+ "ScaleUnit": "1GHz",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Branches per CPU second",
+ "MetricExpr": "branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "branch_frequency",
+ "ScaleUnit": "1000M/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Branch miss rate",
+ "MetricExpr": "branch\\-misses / branches",
+ "MetricGroup": "Default",
+ "MetricName": "branch_miss_rate",
+ "MetricThreshold": "branch_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1D miss rate",
+ "MetricExpr": "L1\\-dcache\\-load\\-misses / L1\\-dcache\\-loads",
+ "MetricGroup": "Default2",
+ "MetricName": "l1d_miss_rate",
+ "MetricThreshold": "l1d_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "LLC miss rate",
+ "MetricExpr": "LLC\\-load\\-misses / LLC\\-loads",
+ "MetricGroup": "Default2",
+ "MetricName": "llc_miss_rate",
+ "MetricThreshold": "llc_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1I miss rate",
+ "MetricExpr": "L1\\-icache\\-load\\-misses / L1\\-icache\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "l1i_miss_rate",
+ "MetricThreshold": "l1i_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "dTLB miss rate",
+ "MetricExpr": "dTLB\\-load\\-misses / dTLB\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "dtlb_miss_rate",
+ "MetricThreshold": "dtlb_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "iTLB miss rate",
+ "MetricExpr": "iTLB\\-load\\-misses / iTLB\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "itlb_miss_rate",
+ "MetricThreshold": "itlb_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1 prefetch miss rate",
+ "MetricExpr": "L1\\-dcache\\-prefetch\\-misses / L1\\-dcache\\-prefetches",
+ "MetricGroup": "Default4",
+ "MetricName": "l1_prefetch_miss_rate",
+ "MetricThreshold": "l1_prefetch_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/software.json b/tools/perf/pmu-events/arch/common/common/software.json
index f2551f1107fd..e6819ae219bb 100644
--- a/tools/perf/pmu-events/arch/common/common/software.json
+++ b/tools/perf/pmu-events/arch/common/common/software.json
@@ -3,13 +3,15 @@
"Unit": "software",
"EventName": "cpu-clock",
"BriefDescription": "Per-CPU high-resolution timer based event",
- "ConfigCode": "0"
+ "ConfigCode": "0",
+ "ScaleUnit": "1e-6msec"
},
{
"Unit": "software",
"EventName": "task-clock",
"BriefDescription": "Per-task high-resolution timer based event",
- "ConfigCode": "1"
+ "ConfigCode": "1",
+ "ScaleUnit": "1e-6msec"
},
{
"Unit": "software",
diff --git a/tools/perf/pmu-events/arch/common/common/tool.json b/tools/perf/pmu-events/arch/common/common/tool.json
index 12f2ef1813a6..14d0d60a1976 100644
--- a/tools/perf/pmu-events/arch/common/common/tool.json
+++ b/tools/perf/pmu-events/arch/common/common/tool.json
@@ -70,5 +70,17 @@
"EventName": "system_tsc_freq",
"BriefDescription": "The amount a Time Stamp Counter (TSC) increases per second",
"ConfigCode": "12"
+ },
+ {
+ "Unit": "tool",
+ "EventName": "core_wide",
+ "BriefDescription": "1 if not SMT, if SMT are events being gathered on all SMT threads 1 otherwise 0",
+ "ConfigCode": "13"
+ },
+ {
+ "Unit": "tool",
+ "EventName": "target_cpu",
+ "BriefDescription": "1 if CPUs being analyzed, 0 if threads/processes",
+ "ConfigCode": "14"
}
]
diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv
index 0a7e7dcc81be..d5eea7f9aa9a 100644
--- a/tools/perf/pmu-events/arch/riscv/mapfile.csv
+++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv
@@ -20,5 +20,6 @@
0x489-0x8000000000000008-0x[[:xdigit:]]+,v1,sifive/p550,core
0x489-0x8000000000000[1-6]08-0x[9b][[:xdigit:]]+,v1,sifive/p650,core
0x5b7-0x0-0x0,v1,thead/c900-legacy,core
+0x5b7-0x80000000090c0d00-0x2047000,v1,thead/c900-legacy,core
0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core
0x31e-0x8000000000008a45-0x[[:xdigit:]]+,v1,andes/ax45,core
diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
index 3ab1d3a6638c..57b785307a85 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
@@ -7,17 +7,17 @@
{
"BriefDescription": "Cycles per Instruction",
"MetricName": "cpi",
- "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Problem State Instruction Ratio",
"MetricName": "prbstate",
- "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(PROBLEM_STATE_INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Level One Miss per 100 Instructions",
"MetricName": "l1mp",
- "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(L1I_DIR_WRITES) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 2 cache",
@@ -52,7 +52,7 @@
{
"BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1",
"MetricName": "est_cpi",
- "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
index 74df533c8b6f..7ded6a5a76c0 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
@@ -7,17 +7,17 @@
{
"BriefDescription": "Cycles per Instruction",
"MetricName": "cpi",
- "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Problem State Instruction Ratio",
"MetricName": "prbstate",
- "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(PROBLEM_STATE_INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Level One Miss per 100 Instructions",
"MetricName": "l1mp",
- "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(L1I_DIR_WRITES) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 2 cache",
@@ -52,7 +52,7 @@
{
"BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1",
"MetricName": "est_cpi",
- "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(L1C_TLB2_MISSES) else 0"
},
{
"BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss",
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 377dfecd96bd..cae7c0cf02f2 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -552,7 +552,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -751,7 +751,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -790,11 +790,20 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
@@ -802,23 +811,14 @@
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -826,7 +826,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -862,7 +862,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -879,7 +879,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -992,7 +992,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(25 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_core_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1109,7 +1108,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1238,7 +1237,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1851,7 +1850,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute",
@@ -1912,7 +1911,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1926,7 +1925,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
@@ -1936,7 +1935,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
@@ -1980,7 +1979,6 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -2032,6 +2030,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2150,12 +2155,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (cpu_core@MEM_INST_RETIRED.ALL_LOADS@ - cpu_core@MEM_LOAD_RETIRED.FB_HIT@ - cpu_core@MEM_LOAD_RETIRED.L1_MISS@) * 20 / 100, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2171,7 +2176,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "3 * tma_info_system_core_frequency * cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2192,12 +2196,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "9 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2279,6 +2282,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2314,7 +2318,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2324,13 +2328,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2341,7 +2345,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2400,7 +2403,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2439,6 +2442,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2507,6 +2511,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2517,6 +2522,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "(cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + max(cpu_core@RS.EMPTY_RESOURCE@ - cpu_core@RESOURCE_STALLS.SCOREBOARD@, 0)) / tma_info_thread_clks * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2527,6 +2533,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2537,7 +2544,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2548,7 +2554,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2560,7 +2565,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2591,7 +2596,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2626,7 +2630,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index 5461576dafc7..be15a7f83717 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -4,7 +4,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -36,7 +35,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -47,7 +46,6 @@
"Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PublicDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -57,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -67,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -78,7 +76,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -88,7 +86,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f",
"Unit": "cpu_core"
@@ -98,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -108,7 +106,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -118,7 +116,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -137,7 +135,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -167,7 +165,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -177,7 +175,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4",
"Unit": "cpu_core"
@@ -187,7 +185,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1",
"Unit": "cpu_core"
@@ -197,7 +195,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27",
"Unit": "cpu_core"
@@ -207,7 +205,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0",
"Unit": "cpu_core"
@@ -217,7 +214,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2",
"Unit": "cpu_core"
@@ -227,7 +224,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4",
"Unit": "cpu_core"
@@ -237,7 +234,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24",
"Unit": "cpu_core"
@@ -247,7 +244,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1",
"Unit": "cpu_core"
@@ -257,7 +254,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -267,7 +264,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -277,7 +273,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -287,7 +283,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -297,7 +293,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2",
"Unit": "cpu_core"
@@ -307,7 +303,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22",
"Unit": "cpu_core"
@@ -317,7 +313,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8",
"Unit": "cpu_core"
@@ -327,7 +323,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28",
"Unit": "cpu_core"
@@ -337,7 +333,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -357,7 +353,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -377,7 +373,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f",
"Unit": "cpu_core"
@@ -552,7 +548,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd",
"Unit": "cpu_core"
@@ -853,7 +849,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -882,7 +877,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -895,7 +890,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -908,7 +903,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -921,7 +916,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -934,7 +929,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -947,7 +942,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -960,7 +955,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -973,7 +968,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1050,7 +1045,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -1068,6 +1063,30 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C0008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C1000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
@@ -1308,6 +1327,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C4477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts L1 data cache software prefetches which include T0/T1/T2 and NTA (except PREFETCHW) that have any type of response.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
@@ -1372,7 +1403,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "OFFCORE_REQUESTS.ALL_REQUESTS Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1382,7 +1412,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1392,7 +1422,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1402,7 +1432,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1412,7 +1442,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1424,7 +1454,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1436,7 +1465,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1447,7 +1475,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1458,7 +1486,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1469,7 +1496,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1480,7 +1507,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1490,7 +1516,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1500,7 +1526,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1510,7 +1536,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1520,7 +1546,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf",
"Unit": "cpu_core"
@@ -1530,7 +1555,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1540,7 +1565,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1550,7 +1575,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1560,7 +1585,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
index d01f1b163ed8..62fd70f220e5 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
@@ -14,7 +14,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -33,7 +32,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -43,7 +42,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -53,7 +51,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -63,7 +60,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -73,7 +69,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -83,7 +78,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -93,7 +87,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -103,7 +96,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -123,7 +115,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -133,7 +125,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -143,7 +135,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -153,7 +145,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18",
"Unit": "cpu_core"
@@ -163,7 +155,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -173,7 +165,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -183,7 +175,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -193,7 +185,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
index dae3174a74fb..ff3b30c2619a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
@@ -14,7 +14,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +24,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +34,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -44,7 +43,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -302,7 +301,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -314,7 +313,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -324,7 +322,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -335,7 +333,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -346,7 +344,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -356,7 +354,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -367,7 +365,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -378,7 +376,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -388,7 +386,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -399,7 +397,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -411,7 +409,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -421,7 +419,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -431,7 +429,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -442,7 +440,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -454,7 +452,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -464,7 +462,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -475,7 +473,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -487,7 +485,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index 07f5786bdbc0..a0260d5b8619 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_core"
@@ -79,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -90,7 +89,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -101,7 +99,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -112,7 +109,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -123,7 +120,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -417,7 +414,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -427,7 +423,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json
index 5f64138edfe4..af46cde26b54 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count. Available PDIST counters: 0",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -14,7 +14,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -24,7 +23,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_1",
- "PublicDescription": "CORE_POWER.LICENSE_1 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -34,7 +32,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_2",
- "PublicDescription": "CORE_POWER.LICENSE_2 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -44,7 +41,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_3",
- "PublicDescription": "CORE_POWER.LICENSE_3 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -113,7 +109,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
index 48ef2a8cc49a..57a8c78cdc49 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
@@ -6,7 +6,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -27,14 +26,15 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
@@ -42,8 +42,9 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
@@ -57,7 +58,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -78,7 +78,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -108,7 +107,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -118,7 +116,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b",
"Unit": "cpu_core"
@@ -549,7 +547,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -559,7 +557,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -569,7 +567,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70",
"Unit": "cpu_core"
@@ -597,7 +595,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -607,7 +605,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -617,7 +615,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -629,7 +626,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -649,7 +645,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -687,7 +683,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -724,7 +720,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -734,7 +730,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -745,7 +740,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -756,7 +750,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -767,7 +760,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -778,7 +770,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -789,7 +780,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -799,7 +789,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -809,7 +799,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -819,7 +808,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -829,7 +818,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -839,7 +828,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -850,7 +839,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -861,7 +849,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -871,7 +859,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -881,7 +869,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -927,7 +915,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -937,7 +924,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -956,7 +943,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -968,7 +955,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -978,7 +965,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -988,7 +975,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1000,7 +987,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1010,7 +996,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1020,7 +1006,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13",
"Unit": "cpu_core"
@@ -1030,7 +1015,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac",
"Unit": "cpu_core"
@@ -1040,7 +1024,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -1050,7 +1034,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -1060,7 +1044,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1070,7 +1053,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1080,7 +1062,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1090,7 +1071,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1119,7 +1099,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1138,7 +1118,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88",
"Unit": "cpu_core"
@@ -1148,7 +1128,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -1158,7 +1138,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1169,7 +1149,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1180,7 +1160,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1190,7 +1170,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1202,7 +1182,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1258,7 +1238,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1268,7 +1248,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1288,7 +1268,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1298,7 +1278,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1308,7 +1288,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1318,7 +1297,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1331,7 +1310,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1341,7 +1320,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when Reservation Station (RS) is empty due to a resource in the back-end Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1355,7 +1333,6 @@
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1366,7 +1343,6 @@
"Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1395,7 +1371,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1405,7 +1381,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1415,7 +1391,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1425,7 +1401,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1444,7 +1419,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1661,7 +1636,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "UOPS_DECODED.DEC0_UOPS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1671,7 +1645,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1681,7 +1655,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1691,7 +1665,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1701,7 +1675,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1711,7 +1685,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1721,7 +1695,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1731,7 +1705,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1742,7 +1716,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1753,7 +1727,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1764,7 +1738,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1775,7 +1749,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1786,7 +1760,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1797,7 +1771,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1808,7 +1782,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1819,7 +1793,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1831,7 +1805,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1844,7 +1818,6 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1854,7 +1827,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1864,7 +1836,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1883,7 +1855,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1894,7 +1866,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1913,7 +1884,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1923,7 +1894,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1954,7 +1925,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1964,7 +1934,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "Counts the retirement slots used each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1976,7 +1946,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1989,7 +1959,6 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
index 7c0779c74154..b5604c7534e1 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
@@ -65,7 +65,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
@@ -103,7 +102,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_TRK_REQUESTS.RD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
index ffbbd08acc68..132ce48af6d9 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -15,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -35,7 +35,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -45,7 +45,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -55,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -65,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -75,7 +75,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -85,7 +85,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -96,7 +96,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -116,7 +116,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -126,7 +126,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -136,7 +136,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -146,7 +146,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -156,7 +156,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -184,7 +184,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -195,7 +195,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -215,7 +215,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -225,7 +225,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -235,7 +235,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -245,7 +245,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index ce93648043ef..0f72c9192df6 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -460,12 +460,12 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricName": "tma_info_system_cpu_utilization"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P:k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
index 669f4979b651..76a841675337 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
@@ -247,7 +247,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -259,7 +259,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -271,7 +271,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -283,7 +283,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -295,7 +295,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -307,7 +307,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -319,7 +319,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -331,7 +331,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
index 1dd61baec1a9..d650cbd48c1f 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
@@ -9,16 +9,18 @@
"UMask": "0x3"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
index 7c0779c74154..b5604c7534e1 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
@@ -65,7 +65,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
@@ -103,7 +102,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_TRK_REQUESTS.RD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json b/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
index b22a02450e6c..4f1f77404943 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -567,7 +567,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -774,7 +774,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -786,7 +786,7 @@
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -813,35 +813,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -849,7 +849,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -858,7 +858,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -885,7 +885,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -902,7 +902,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -1042,7 +1042,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@R, 25 * tma_info_system_core_frequency)) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1095,7 +1094,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
"MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -1149,7 +1148,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1166,7 +1165,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -1216,7 +1215,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1226,7 +1225,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1236,7 +1235,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1246,7 +1245,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1257,7 +1256,7 @@
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvFB;BvIO;Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -1278,7 +1277,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1456,7 +1455,7 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc",
"Unit": "cpu_core"
@@ -1597,7 +1596,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1606,7 +1605,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1615,7 +1614,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1624,7 +1623,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_DOUBLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1633,7 +1632,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_SINGLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1658,7 +1657,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1713,7 +1712,7 @@
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / tma_info_system_time",
+ "MetricExpr": "64 * cpu_core@L1D.L1_REPLACEMENT@ / 1e9 / tma_info_system_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "tma_info_memory_l1d_cache_fill_bw",
"Unit": "cpu_core"
@@ -1726,6 +1725,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L0 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * (cpu_core@MEM_LOAD_RETIRED.L1_MISS@ + cpu_core@MEM_LOAD_RETIRED.L1_HIT_L1@) / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1dl0_mpki",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / cpu_core@INST_RETIRED.ANY@",
"MetricGroup": "CacheHits;Mem",
@@ -1941,6 +1947,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "cpu_core@IDQ.MS_UOPS@ / cpu_core@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY@",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1974,7 +1987,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1988,14 +2001,22 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "32 * UNC_M_TOTAL_DATA / 1e9 / tma_info_system_time",
+ "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_system_gflops",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
@@ -2062,6 +2083,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2182,12 +2210,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
- "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY@ / tma_info_thread_clks",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2203,7 +2231,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L2_HIT@R, 3 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2224,12 +2251,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2311,6 +2337,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2321,7 +2348,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
"MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -2346,7 +2373,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2356,13 +2383,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2373,7 +2400,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2412,7 +2438,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
"MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
@@ -2432,7 +2458,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks / 1.8",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2471,7 +2497,8 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
"MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
@@ -2509,6 +2536,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2519,6 +2547,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2529,6 +2558,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2539,7 +2569,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2550,7 +2579,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2571,7 +2599,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2586,7 +2614,7 @@
"MetricGroup": "BvIO;PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
- "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
+ "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: PARTIAL_RAT_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2602,7 +2630,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2637,7 +2664,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2652,6 +2679,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This metric estimates clocks wasted due to loads blocked due to unknown store address (did not do memory disambiguation) or due to unknown store data",
+ "MetricExpr": "7 * cpu_core@LD_BLOCKS.STORE_EARLY\\,cmask\\=1@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_early_blk",
+ "MetricThreshold": "tma_store_early_blk > 0.2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
index 91929d8bcf47..fba4a0672f6c 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
@@ -16,6 +16,16 @@
"PublicDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches. Does not count evictions or dirty writebacks caused by snoops. Does not count a replacement unless a (dirty) line was written back.",
"SampleAfterValue": "200003",
"UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x51",
+ "EventName": "DL1.DIRTY_EVICTION",
+ "PublicDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches. Does not count evictions or dirty writebacks caused by snoops. Does not count a replacement unless a (dirty) line was written back.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
"Unit": "cpu_lowpower"
},
{
@@ -29,6 +39,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cachelines replaced into the L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L1_REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cachelines replaced into the L0 and L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x51",
@@ -104,6 +124,15 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.E",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
@@ -114,12 +143,40 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.F",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Forward state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.F",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Forward state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Invalid state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.I",
+ "PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Invalid state, does not count lines that go Invalid due to an eviction",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Modified state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.M",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Modified state",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
@@ -134,6 +191,15 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Shared state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.S",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Shared state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
@@ -180,6 +246,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of L2 cache lines that have been L2 hardware prefetched but not used by demand accesses",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "PublicDescription": "Counts the number of L2 cache lines that have been L2 hardware prefetched but not used by demand accesses. Increments on the core that brought the line in originally.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x26",
@@ -190,6 +266,42 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to Dynamic Prefetch Throttling. The throttle requestor/source could be from the uncore/SOC or the Dead Block Predictor. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DPT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by the L2 Stream that were throttled due to Demand Throttle Prefetcher. DTP Global Triggered with no Local Override. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DTP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by the L2 Stream and not throttled by DTP due to local override. These prefetches may still be throttled due to another throttler mechanism besides DTP. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DTP_OVERRIDE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.XQ_THRESH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of demand and prefetch transactions that the External Queue (XQ) rejects due to a full or near full condition.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x30",
@@ -199,6 +311,16 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of L2 Cache Accesses Counts the total number of L2 Cache Accesses - sum of hits, misses, rejects front door requests for CRd/DRd/RFO/ItoM/L2 Prefetches only, per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.ALL",
+ "PublicDescription": "Counts the number of L2 Cache Accesses Counts the total number of L2 Cache Accesses - sum of hits, misses, rejects front door requests for CRd/DRd/RFO/ItoM/L2 Prefetches only.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES, L2_RQSTS.ANY]",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x24",
@@ -213,11 +335,29 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 Cache Accesses that resulted in a Hit from a front door request only (does not include rejects or recycles), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.HIT",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of total L2 Cache Accesses that resulted in a Miss from a front door request only (does not include rejects or recycles), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_RQSTS.MISS]",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x24",
@@ -237,6 +377,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.REJECTS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
@@ -356,6 +505,51 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LLC prefetches that were throttled due to Dynamic Prefetch Throttling. The throttle requestor/source could be from the uncore/SOC or the Dead Block Predictor. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DPT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to Demand Throttle Prefetcher. DTP Global Triggered with no Local Override. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DTP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches not throttled by DTP due to local override. These prefetches may still be throttled due to another throttler mechanism. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DTP_OVERRIDE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to LLC hit rate in <insert knob name here>. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.HIT_RATE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.XQ_THRESH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when L1D is locked",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x42",
@@ -366,6 +560,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2e",
@@ -376,6 +580,26 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2e",
@@ -532,6 +756,15 @@
"EventName": "MEM_BOUND_STALLS_LOAD.SBFULL",
"SampleAfterValue": "1000003",
"UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled to a store buffer full condition",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.SBFULL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
"Unit": "cpu_lowpower"
},
{
@@ -540,7 +773,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
@@ -551,7 +784,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -561,7 +794,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_SWPF",
- "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x84",
"Unit": "cpu_core"
@@ -572,7 +805,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ANY",
- "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x87",
"Unit": "cpu_core"
@@ -583,7 +816,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -594,7 +827,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
- "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -605,18 +838,29 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_STORES",
- "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x42",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_ANY",
+ "PublicDescription": "Number of retired instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that hit the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
- "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -627,18 +871,39 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
- "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xa",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_ANY",
+ "PublicDescription": "Retired instructions that miss the STLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x17",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that miss the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x11",
"Unit": "cpu_core"
@@ -649,18 +914,28 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x12",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$)",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -671,7 +946,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -682,7 +957,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -693,7 +968,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -704,7 +979,7 @@
"Data_LA": "1",
"EventCode": "0xd4",
"EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -715,7 +990,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -726,7 +1001,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -737,7 +1012,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L0",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -747,7 +1022,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L1",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_core"
},
@@ -757,7 +1032,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -768,7 +1043,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -779,7 +1054,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -790,7 +1065,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -801,12 +1076,21 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "50021",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired that hit the L1 data cache",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd1",
@@ -889,6 +1173,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1c",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd1",
@@ -988,6 +1281,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of memory uops retired. A single uop that performs both a load AND a store will be counted as 1, not 2 (e.g. ADD [mem], CONST)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0x83",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load uops retired.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1041,7 +1344,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -1065,7 +1368,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -1101,7 +1404,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -1125,7 +1428,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -1149,7 +1452,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -1173,7 +1476,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -1197,7 +1500,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -1221,7 +1524,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1264,6 +1567,26 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of memory renamed load uops retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.MRN_LOADS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x9",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory renamed store uops retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.MRN_STORES",
+ "SampleAfterValue": "200003",
+ "UMask": "0xa",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of memory uops retired that were splits.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1331,9 +1654,29 @@
"EventName": "MEM_UOPS_RETIRED.STLB_MISS",
"SampleAfterValue": "200003",
"UMask": "0x13",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired that missed in the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x13",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of load ops retired that filled the STLB - includes those in DTLB_LOAD_MISSES submasks",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x11",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load uops retired that miss in the second Level TLB.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1344,6 +1687,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of store ops retired (store STLB miss)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "200003",
+ "UMask": "0x12",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of store uops retired that miss in the second level TLB.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1384,8 +1737,32 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E00008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E01000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1397,7 +1774,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1409,7 +1786,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1421,7 +1798,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1433,7 +1810,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1444,6 +1821,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E04477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x21",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json b/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
index 23a80c526aa1..3e68c2468f11 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
@@ -1,5 +1,15 @@
[
{
+ "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when floating-point divide unit is busy executing divide or square root operations.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -21,6 +31,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of active floating point dividers per cycle in the loop stage.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_OCCUPANCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divider uops executed per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_UOPS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts all microcode FP assists.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc1",
@@ -474,6 +502,51 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on all floating point ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0, 1, 2, 3.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
index 56cf1ec63200..a15de050a76c 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
@@ -30,6 +30,42 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of BACLEARS due to a conditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to an indirect branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.INDIRECT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a return branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a direct, unconditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.UNCOND",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x87",
@@ -49,6 +85,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a decode restriction reduces the decode throughput due to wrong instruction length prediction.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe9",
+ "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "DSB-to-MITE switch true penalty cycles.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x61",
@@ -81,7 +126,7 @@
"EventName": "FRONTEND_RETIRED.ANY_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -93,7 +138,7 @@
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -179,7 +224,7 @@
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
- "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -218,7 +263,7 @@
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -239,7 +284,7 @@
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -251,7 +296,7 @@
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -263,7 +308,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -275,7 +320,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -287,7 +332,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
- "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -299,7 +344,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -311,7 +356,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -323,7 +368,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -335,7 +380,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -347,7 +392,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -359,7 +404,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -371,7 +416,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -383,7 +428,7 @@
"EventName": "FRONTEND_RETIRED.MISP_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0",
+ "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -395,7 +440,7 @@
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0",
+ "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -443,7 +488,7 @@
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -455,7 +500,7 @@
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
- "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -738,8 +783,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "MS_DECODED.MS_BUSY",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the micro-sequencer is busy.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.MS_BUSY",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of times entered into a ucode flow in the FEC. Includes inserted flows due to front-end detected faults or assists.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of times nanocode flow is executed.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.NANO_CODE",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/memory.json b/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
index fb8d4ac69bda..05cc46518232 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
@@ -1,5 +1,14 @@
[
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -63,6 +72,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to other block cases.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.OTHER",
+ "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to other block cases such as pipeline conflicts, fences, etc.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -83,6 +102,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a pagewalk.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.PGWALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -101,6 +129,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a store address match.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_ADDR",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -119,6 +156,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to store data forward block.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_DATA",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to request buffers full or lock in progress.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.WCB_FULL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to request buffers full or lock in progress.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -156,6 +211,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x82",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
"Counter": "2,3,4,5,6,7,8,9",
"Data_LA": "1",
@@ -163,7 +227,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -176,7 +240,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -189,7 +253,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -202,7 +266,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "23",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -215,7 +279,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -228,7 +292,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -241,7 +305,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -254,7 +318,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -267,7 +331,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -280,7 +344,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -291,7 +355,7 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
- "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -334,7 +398,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -346,7 +410,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -358,7 +422,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/other.json b/tools/perf/pmu-events/arch/x86/arrowlake/other.json
index 51bc763a5887..c8feed3a99a6 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/other.json
@@ -19,6 +19,89 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted cycles a Core is blocked due to a lock In Progress issued by another core",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.BLOCKED_CYCLES",
+ "PublicDescription": "Counts the number of unhalted cycles a Core is blocked due to a lock In Progress issued by another core. Counts on a per core basis.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles a Core is blocked due to an Accepted lock it issued, includes both split and non-split lock cycles.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.LOCK_CYCLES",
+ "PublicDescription": "Counts the number of unhalted cycles a Core is blocked due to an Accepted lock it issued, includes both split and non-split lock cycles. Counts on a per core basis.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of non-split locks such as UC locks issued by a Core (does not include cache locks)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.NON_SPLIT_LOCKS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of split locks issued by a Core",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.SPLIT_LOCKS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetchers are at throttle level 0",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL0_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 1",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL1_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL2_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 3",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL3_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 4",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL4_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
"Counter": "0,1,2,3,4,5,6,7",
"Deprecated": "1",
@@ -66,7 +149,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -86,5 +169,41 @@
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand request.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand code read.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.CRDS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand read.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.DRDS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand RFO.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.RFOS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json b/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
index 18a22368b99b..805616052925 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
@@ -31,6 +31,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles when any of the integer dividers are active.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when integer divide unit is busy executing divide or square root operations.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -42,6 +52,24 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of active integer dividers per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_OCCUPANCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of integer divider uops executed per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_UOPS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc1",
@@ -74,13 +102,14 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL010, ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
"PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
@@ -101,7 +130,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
- "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -109,6 +138,7 @@
{
"BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
"SampleAfterValue": "200003",
@@ -116,11 +146,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of not taken JCC branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -139,7 +178,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -158,7 +197,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -168,7 +207,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x102",
"Unit": "cpu_core"
@@ -187,7 +226,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -195,6 +234,7 @@
{
"BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
"SampleAfterValue": "200003",
@@ -215,7 +255,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
- "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -223,6 +263,7 @@
{
"BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
"SampleAfterValue": "200003",
@@ -241,6 +282,7 @@
{
"BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT_CALL",
"SampleAfterValue": "200003",
@@ -248,6 +290,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of near indirect JMP branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "UMask": "0xef",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of near indirect JMP branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
@@ -257,6 +308,17 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "Errata": "ARL011",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of near CALL branch instructions retired",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
@@ -270,7 +332,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
- "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -278,6 +340,7 @@
{
"BriefDescription": "Counts the number of near CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL010, ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
"SampleAfterValue": "200003",
@@ -298,7 +361,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -313,11 +376,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of taken branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc0",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -325,6 +397,7 @@
{
"BriefDescription": "Counts the number of near taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
"SampleAfterValue": "200003",
@@ -372,7 +445,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -390,7 +463,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
- "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x44",
"Unit": "cpu_core"
@@ -409,7 +482,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND",
- "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -428,17 +501,26 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_COST",
- "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x151",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of mispredicted not taken JCC branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -448,7 +530,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
- "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x50",
"Unit": "cpu_core"
@@ -467,7 +549,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -486,7 +568,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -496,7 +578,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8001",
"Unit": "cpu_core"
@@ -506,7 +588,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
- "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x141",
"Unit": "cpu_core"
@@ -516,7 +598,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -525,7 +607,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8002",
"Unit": "cpu_core"
@@ -544,7 +626,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT",
- "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -572,7 +654,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
- "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -591,7 +673,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
- "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x42",
"Unit": "cpu_core"
@@ -601,12 +683,21 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_COST",
- "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xc0",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "UMask": "0xef",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted near indirect JMP branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc5",
@@ -616,11 +707,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of mispredicted near taken branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -639,7 +739,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
- "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x60",
"Unit": "cpu_core"
@@ -649,7 +749,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET",
- "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -677,12 +777,21 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET_COST",
- "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x48",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of BTCLEARS.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe8",
+ "EventName": "BTCLEAR.ANY",
+ "PublicDescription": "Counts the total number of BTCLEARS which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xec",
@@ -1046,7 +1155,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -1063,7 +1172,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.BR_FUSED",
- "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0",
+ "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1073,7 +1182,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
+ "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -1083,7 +1192,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1102,7 +1211,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1181,6 +1290,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on all Integer ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on a load port.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1191,6 +1309,42 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on integer port 0,1, 2, 3.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1321,6 +1475,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of occurrences a retired load was blocked for any of the following reasons: utlb_miss, 4k_alias, unknown_sta/bad_fwd, unready_fwd (includes md blocks and esp consuming load blocks)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address exactly matches an older store whose data is not ready (a.k.a. unknown). unready_fwd",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1349,6 +1512,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a load got early blocked due to preceding store operation with unknown address or unknown data. Excluding in-line (immediate) wakeups",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_EARLY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address partially overlaps with an older store (size mismatch) - unknown_sta/bad_forward",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1377,6 +1549,25 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch [This event is alias to LOAD_HIT_PREFETCH.HW_PF]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.HWPF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to LOAD_HIT_PREFETCH.HWPF]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.HW_PF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -1417,6 +1608,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.ANY_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of machine clears (nukes) of any type.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -1447,6 +1647,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x88",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of nukes due to memory renaming",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
@@ -1456,6 +1665,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MRN_NUKE_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x90",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of times that the machine clears due to a page fault. Covers both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
@@ -1559,11 +1777,20 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe4",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "LBR record is inserted",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "LBR record is inserted Available PDIST counters: 0",
+ "PublicDescription": "LBR record is inserted Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1578,6 +1805,86 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of CLFLUSH, CLWB, and CLDEMOTE instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.CL_INST",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LFENCE instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.LFENCE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of RDPMC, RDTSC, and RDTSCP instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.RDPMC_RDTSC_P",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Count the number of WRMSR instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.WRMSR",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of faults and software interrupts with vector < 32.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.FAULT_ALL",
+ "PublicDescription": "Counts the number of faults and software interrupts with vector < 32, including VOE cases.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of PSB+ nuke events and ToPA trap events.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.INTEL_PT_CLEARS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of user interrupts delivered.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.ULI_DELIVERY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of SENDUIPI instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.ULI_SENDUIPI",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of VM exits.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.VM_EXIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xa5",
@@ -1628,6 +1935,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number issue slots not consumed due to a color request for an FCW or MXCSR control register when all 4 colors (copies) are already in use",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.COLOR_STALLS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots where no uop could issue due to an IQ scoreboard that stalls allocation until a specified older uop retires or (in the case of jump scoreboard) executes. Commonly executed instructions with IQ scoreboards include LFENCE and MFENCE.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x75",
@@ -1705,6 +2021,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Fixed Counter: Counts the number of issue slots not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "Counter": "Fixed counter 4",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+ "PublicDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the IQ. Also, includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x73",
@@ -1821,6 +2146,14 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
@@ -1929,7 +2262,7 @@
},
{
"BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.",
- "Counter": "37",
+ "Counter": "Fixed counter 5",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x6",
@@ -1938,6 +2271,14 @@
{
"BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "TOPDOWN_FE_BOUND.ALL_P",
"SampleAfterValue": "1000003",
@@ -2126,7 +2467,7 @@
},
{
"BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.",
- "Counter": "38",
+ "Counter": "Fixed counter 6",
"EventName": "TOPDOWN_RETIRING.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x7",
@@ -2135,6 +2476,14 @@
{
"BriefDescription": "Counts the number of consumed retirement slots.",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x72",
+ "EventName": "TOPDOWN_RETIRING.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of consumed retirement slots.",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "TOPDOWN_RETIRING.ALL_P",
"SampleAfterValue": "1000003",
@@ -2352,6 +2701,14 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of uops retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the total number of uops retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
@@ -2399,6 +2756,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of uops retired that were delivered by the loop stream detector (LSD).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.LSD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
index a3e4a4f3ab45..602e2ad5de6e 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
@@ -9,6 +9,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x08",
@@ -48,6 +57,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x12",
@@ -176,6 +195,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x49",
@@ -216,6 +244,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x13",
@@ -245,6 +283,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x13",
@@ -325,6 +373,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of times there was an ITLB miss and a new translation was filled into the ITLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x81",
+ "EventName": "ITLB.FILLS",
+ "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) and a new translation was filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x85",
@@ -343,6 +401,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x85",
@@ -502,6 +569,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of occurrences a load gets blocked because of a micro TLB miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DTLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a DTLB miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.DTLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -518,5 +603,33 @@
"SampleAfterValue": "1000003",
"UMask": "0x90",
"Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of PMH walks that hit in the L1 or WCBs",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbc",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of PMH walks that hit in the L2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbc",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2_HIT",
+ "PublicDescription": "Counts the number of PMH walks that hit in the L2. Includes L2 Hit resulting from and L1D eviction of another core in the same module which is longer latency than a typical L2 hit.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Count number of any STLB flush attempts (Entire, PCID, InvPage, CR3 write, etc)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbd",
+ "EventName": "TLB_FLUSHES.STLB_ANY",
+ "SampleAfterValue": "20003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 89750117a7f6..1d8e910f5961 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -640,7 +637,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -653,7 +650,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -665,7 +662,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -854,7 +851,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1032,7 +1028,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1041,7 +1037,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1050,7 +1046,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 81175f0f2603..a5e408ca46a7 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -632,7 +629,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -645,7 +642,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -657,7 +654,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -846,7 +843,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1021,7 +1017,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1030,7 +1026,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1039,7 +1035,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 5d06a3f72be2..5b83b040060c 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -282,7 +282,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -300,7 +299,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -341,7 +339,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -842,7 +839,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -855,7 +852,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -867,7 +864,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -907,6 +904,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -1076,7 +1074,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1086,6 +1083,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -1263,7 +1261,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1272,7 +1270,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1281,7 +1279,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1308,6 +1306,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 6485b565acbc..2e50a91b6728 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -319,6 +319,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -356,6 +357,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -370,31 +372,35 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
@@ -402,6 +408,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
@@ -410,7 +417,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -418,7 +426,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -426,6 +435,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -434,7 +444,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -442,6 +453,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -469,6 +481,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -539,6 +552,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -569,7 +591,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_cxl_mem_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -630,7 +652,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -693,7 +715,6 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
@@ -768,6 +789,7 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_bottleneck_mispredictions * tma_info_thread_slots / 4 / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -803,6 +825,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -820,6 +843,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -961,7 +985,6 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
@@ -1249,7 +1272,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1267,6 +1290,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1282,7 +1311,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1294,16 +1323,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1362,6 +1403,13 @@
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
+ "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
+ "MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
+ "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
@@ -1499,12 +1547,13 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1541,7 +1590,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1565,6 +1614,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1591,6 +1641,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_1G / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_1g",
@@ -1599,6 +1650,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_2m",
@@ -1607,6 +1659,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_4K / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_4k",
@@ -1624,6 +1677,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1648,7 +1702,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1657,7 +1711,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1681,7 +1735,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
@@ -1691,6 +1744,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
@@ -1745,6 +1799,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1754,6 +1809,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
"MetricGroup": "BrMispredicts;BvIO;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
"MetricName": "tma_other_mispredicts",
@@ -1762,6 +1818,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
"MetricGroup": "BvIO;Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_other_nukes",
@@ -1842,6 +1899,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -1956,7 +2014,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -2013,6 +2071,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_1G / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_1g",
@@ -2021,6 +2080,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_2m",
@@ -2029,6 +2089,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_4K / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_4k",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
index c9596e18ec09..30390d734051 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
@@ -22,7 +22,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.UNCACHEABLE",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index 265cdf334f6a..aafa7af46e69 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -316,32 +316,32 @@
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MiB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.READ",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MiB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
"MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MiB/sec). Derived from unc_m_pmm_wpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE7",
"EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
index 10bdb193c16f..26568e4b77f7 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
@@ -1,10 +1,72 @@
[
{
+ "BriefDescription": "Hit snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's cache, after the data is forwarded back to the requestor and indicating the data was found unmodified in the (FE) Forward or Exclusive State in this cores caches cache. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's caches, after the data is forwarded back to the requestor, and indicating the data was found modified(M) in this cores caches cache (aka HitM response). A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated in this core's caches without forwarded back to the requestor. The line was in Forward, Shared or Exclusive (FSE) state in this cores caches. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Line not found snoop reply",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.MISS",
+ "PublicDescription": "Counts responses to snoops indicating that the data was not found (IHitI) in this core's caches. A single snoop response from the core counts on all hyperthreads of the Core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Hit snoop reply with data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (FS) Forward or Shared state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line kept in Shared state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (M)odified state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line was kept on this core in the (S)hared state, and that the data was found unmodified but not forwarded back to the requestor, initially the data was found in the cache in the (FSE) Forward, Shared state or Exclusive state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "L1D.HWPF_MISS",
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -13,7 +75,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -22,7 +84,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -33,7 +95,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -43,7 +105,6 @@
"Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PublicDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -52,7 +113,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -61,7 +122,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -71,7 +132,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -80,7 +141,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f"
},
@@ -89,7 +150,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2"
},
@@ -98,7 +159,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -107,7 +168,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -116,7 +177,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -125,7 +186,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -134,7 +195,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4"
},
@@ -143,7 +204,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1"
},
@@ -152,7 +213,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27"
},
@@ -161,7 +222,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7"
},
@@ -170,7 +231,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0"
},
@@ -179,7 +239,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2"
},
@@ -188,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4"
},
@@ -197,7 +257,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24"
},
@@ -206,7 +266,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1"
},
@@ -215,7 +275,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21"
},
@@ -224,7 +284,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30"
},
@@ -233,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -242,7 +301,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -251,7 +310,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2"
},
@@ -260,7 +319,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22"
},
@@ -269,7 +328,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8"
},
@@ -278,7 +337,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28"
},
@@ -287,7 +346,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40"
},
@@ -296,7 +355,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41"
},
@@ -305,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f"
},
@@ -394,7 +453,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd"
},
@@ -563,7 +622,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -572,7 +630,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -999,7 +1057,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "OFFCORE_REQUESTS.ALL_REQUESTS Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -1008,7 +1065,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1017,7 +1074,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1026,7 +1083,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1035,7 +1092,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -1045,7 +1102,6 @@
"Deprecated": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1055,7 +1111,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1065,7 +1120,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1075,7 +1130,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1085,7 +1139,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1094,7 +1147,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1103,7 +1155,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1112,7 +1164,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -1121,7 +1173,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -1130,7 +1182,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf"
},
@@ -1139,7 +1190,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1148,7 +1199,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1157,7 +1208,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1166,7 +1217,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
index 34e1cbcd722c..433ae5f50704 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -40,6 +40,18 @@
"ScaleUnit": "1per_instr"
},
{
+ "BriefDescription": "The average number of cores that are in cstate C0 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c0"
+ },
+ {
+ "BriefDescription": "The average number of cores are in cstate C6 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c6"
+ },
+ {
"BriefDescription": "CPU operating frequency (in GHz)",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
"MetricName": "cpu_operating_frequency",
@@ -79,6 +91,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_read_local",
@@ -97,6 +115,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_write_local",
@@ -111,19 +135,19 @@
{
"BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
- "MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
+ "MetricName": "io_full_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
- "MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
+ "MetricName": "io_partial_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
- "MetricName": "io_percent_of_inbound_reads_that_miss_l3",
+ "MetricName": "io_read_l3_miss",
"ScaleUnit": "100%"
},
{
@@ -335,7 +359,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -370,39 +394,39 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -434,7 +458,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -450,7 +474,7 @@
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -551,7 +575,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(76.6 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 74.6 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -658,7 +681,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -786,7 +809,7 @@
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1297,19 +1320,19 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "CacheHits;Offcore",
+ "MetricGroup": "CacheHits;Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
},
{
@@ -1335,21 +1358,21 @@
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_dram_bw",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_l3m_bw",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_offcore_bw",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
@@ -1379,7 +1402,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1426,7 +1449,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1438,16 +1461,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1513,7 +1548,6 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1674,12 +1708,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1693,7 +1727,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "4.4 * tma_info_system_core_frequency * MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -1712,12 +1745,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "32.6 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1800,6 +1832,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1832,7 +1865,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1841,13 +1874,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1857,7 +1890,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -1910,7 +1942,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1945,6 +1977,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2006,6 +2039,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2015,6 +2049,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + max(RS.EMPTY_RESOURCE - RESOURCE_STALLS.SCOREBOARD, 0)) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2024,6 +2059,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2033,7 +2069,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2043,7 +2078,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2072,7 +2106,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2100,7 +2134,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2132,7 +2165,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
index 8c9207750c82..bc475e163227 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
@@ -5,7 +5,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -23,7 +22,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -32,7 +30,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -41,7 +38,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -50,7 +46,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -59,7 +54,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -68,7 +62,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -77,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -86,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -95,7 +87,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -104,7 +96,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -122,7 +114,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18"
},
@@ -131,7 +123,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -140,7 +132,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -149,7 +141,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x60"
},
@@ -158,7 +150,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -167,7 +159,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -176,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc"
},
@@ -194,7 +186,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -203,7 +194,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -212,7 +202,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -221,7 +210,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -230,7 +218,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR",
"SampleAfterValue": "100003",
"UMask": "0x3"
},
@@ -239,7 +227,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -248,7 +235,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.VECTOR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR",
"SampleAfterValue": "100003",
"UMask": "0x1c"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
index 9fe9d62b867a..793c486ffabe 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -22,7 +22,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2"
},
@@ -31,7 +30,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -249,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -260,7 +259,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -269,7 +267,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -279,7 +277,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -289,7 +287,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -298,7 +296,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -308,7 +306,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -318,7 +316,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -327,7 +325,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -337,7 +335,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -348,7 +346,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -357,7 +355,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -366,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -376,7 +374,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -387,7 +385,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -396,7 +394,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -406,7 +404,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -417,7 +415,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
index 7c3f9b76d367..5e6c1f05c981 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -24,7 +23,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -34,7 +32,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -44,7 +41,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -54,7 +51,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -478,7 +475,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -487,7 +483,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -505,7 +501,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt).",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -514,7 +510,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -523,7 +519,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -532,7 +528,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -541,7 +537,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Counts the number of times RTM commit succeeded. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -550,7 +546,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.START",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -559,7 +555,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_READ",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -568,7 +564,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes. Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -577,7 +573,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
"SampleAfterValue": "100003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
index a58d65556609..21f49f609ed4 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
@@ -4,11 +4,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
{
+ "BriefDescription": "HW_INTERRUPTS.MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -25,7 +49,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
index 48bec483b49a..1fa7957956df 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
@@ -6,7 +6,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -16,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -27,7 +26,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -37,7 +35,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -48,7 +45,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -57,7 +53,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b"
},
@@ -217,7 +213,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -226,7 +222,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -235,7 +231,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70"
},
@@ -244,7 +240,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -253,7 +249,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2"
},
@@ -262,7 +258,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -273,7 +268,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -282,7 +276,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -299,7 +293,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -316,7 +310,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003"
},
{
@@ -325,7 +319,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -335,7 +328,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -345,7 +337,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -355,7 +346,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -365,7 +355,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -375,7 +364,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -384,7 +372,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb7",
"EventName": "EXE.AMX_BUSY",
- "PublicDescription": "Counts the cycles where the AMX (Advance Matrix Extension) unit is busy performing an operation. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -393,7 +380,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -402,7 +389,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc"
},
@@ -411,7 +397,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -420,7 +406,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -429,7 +415,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -439,7 +425,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21"
},
@@ -449,7 +434,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -458,7 +443,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -467,7 +452,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -492,7 +477,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -501,7 +485,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -518,7 +502,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -529,7 +513,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -538,7 +522,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80"
},
@@ -547,7 +531,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.MBA_STALLS",
- "PublicDescription": "INT_MISC.MBA_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -556,7 +539,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -567,7 +550,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -576,7 +558,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -585,7 +567,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13"
},
@@ -594,7 +575,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac"
},
@@ -603,7 +583,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -612,7 +592,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -621,7 +601,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -630,7 +609,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -639,7 +617,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -648,7 +625,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -657,7 +633,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -666,7 +642,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88"
},
@@ -675,7 +651,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82"
},
@@ -684,7 +660,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -694,7 +670,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -704,7 +680,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -713,7 +689,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -724,7 +700,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -733,7 +709,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -742,7 +718,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20"
},
@@ -751,7 +727,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -760,7 +736,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -769,7 +745,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -778,7 +753,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -790,7 +765,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -799,7 +774,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when Reservation Station (RS) is empty due to a resource in the back-end Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -812,7 +786,6 @@
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -822,7 +795,6 @@
"Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -831,7 +803,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
@@ -840,7 +812,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -849,7 +821,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
@@ -858,7 +830,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10"
},
@@ -875,7 +846,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
},
@@ -884,7 +855,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "UOPS_DECODED.DEC0_UOPS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -893,7 +863,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -902,7 +872,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -911,7 +881,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -920,7 +890,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -929,7 +899,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -938,7 +908,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -947,7 +917,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
@@ -956,7 +926,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -966,7 +936,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -976,7 +946,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -986,7 +956,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -996,7 +966,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1006,7 +976,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1016,7 +986,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1026,7 +996,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1036,7 +1006,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1047,7 +1017,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1059,7 +1029,6 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1068,7 +1037,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1077,7 +1045,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -1086,7 +1054,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1096,7 +1064,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1106,7 +1073,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1115,7 +1082,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1126,7 +1093,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -1135,7 +1101,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "Counts the retirement slots used each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1146,7 +1112,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1158,7 +1124,6 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
index f453202d80c2..92cf47967f0b 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
@@ -312,6 +312,17 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Distress signal asserted : DPT Remote",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xaf",
+ "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_NONLOCAL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Distress signal asserted : DPT Remote : Counts the number of cycles either the local or incoming distress signals are asserted. : Dynamic Prefetch Throttle received by this tile",
+ "UMask": "0x8",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Egress Blocking due to Ordering requirements : Down",
"Counter": "0,1,2,3",
"EventCode": "0xba",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
index 68be01dad7c9..30044177ccf8 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
@@ -2770,6 +2770,88 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "ECC Correctable Errors",
"Counter": "0,1,2,3",
"EventCode": "0x09",
@@ -3048,6 +3130,28 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1. : Thermal throttling is performed per DIMM. We support 3 DIMMs per channel. This ID allows us to filter by ID.",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1.",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "Precharge due to read, write, underfill, or PGT.",
"Counter": "0,1,2,3",
"EventCode": "0x03",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
index 9482ddaea4d1..71c35b165a3e 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
@@ -178,7 +178,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C0 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
@@ -198,7 +197,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x37",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C6 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
index 3d3f88600e26..609a9549cbf3 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -14,7 +14,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -23,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -32,7 +32,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -41,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -50,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -59,7 +59,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -68,7 +68,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -78,7 +78,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -87,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -96,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -105,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -114,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -123,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -142,7 +142,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -151,7 +151,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -160,7 +160,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -169,7 +169,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -178,7 +178,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
}
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/cache.json b/tools/perf/pmu-events/arch/x86/grandridge/cache.json
index 877052db1490..9abddb06a837 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/cache.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/cache.json
@@ -286,7 +286,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024",
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -308,7 +308,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -319,7 +319,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048",
@@ -330,7 +330,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -341,7 +341,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -352,7 +352,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -363,7 +363,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -374,7 +374,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -385,7 +385,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
index 878b1caf12de..a0d637a24c1b 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -633,7 +633,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricName": "tma_info_system_cpu_utilization"
},
{
@@ -645,7 +645,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P:k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
index dbdeade6fe6f..db28866444b6 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
@@ -4,7 +4,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -13,7 +12,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -22,7 +21,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -33,7 +32,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -42,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -51,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -61,7 +60,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -70,7 +69,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f"
},
@@ -79,7 +78,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2"
},
@@ -88,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -97,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -106,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -115,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf"
},
@@ -124,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -133,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4"
},
@@ -142,7 +141,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1"
},
@@ -151,7 +150,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27"
},
@@ -160,7 +159,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7"
},
@@ -169,7 +168,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0"
},
@@ -178,7 +176,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2"
},
@@ -187,7 +185,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4"
},
@@ -196,7 +194,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24"
},
@@ -205,7 +203,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1"
},
@@ -214,7 +212,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21"
},
@@ -223,7 +221,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf"
},
@@ -232,7 +230,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30"
},
@@ -241,7 +238,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -250,7 +247,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -259,7 +256,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2"
},
@@ -268,7 +265,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22"
},
@@ -277,7 +274,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8"
},
@@ -286,7 +283,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28"
},
@@ -295,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40"
},
@@ -304,7 +301,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41"
},
@@ -313,7 +310,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f"
},
@@ -437,7 +434,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd"
},
@@ -491,12 +488,12 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from dram homed in the local socket",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
- "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM. Available PDIST counters: 0",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from DRAM homed in the local socket. Available PDIST counters: 0",
"RetirementLatencyMax": 4146,
"RetirementLatencyMean": 115.83,
"RetirementLatencyMin": 0,
@@ -504,6 +501,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Retired load instructions with remote cxl mem as the data source where the data request missed all caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM",
+ "PublicDescription": "Counts retired load instructions with remote cxl mem as the data source and the data request missed L3. Available PDIST counters: 0",
+ "SampleAfterValue": "100007",
+ "UMask": "0x10"
+ },
+ {
"BriefDescription": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
"Counter": "0,1,2,3",
"Data_LA": "1",
@@ -629,11 +635,20 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Retired load instructions with local cxl mem as the data source where the data request missed all caches.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.LOCAL_CXL_MEM",
+ "PublicDescription": "Counts retired load instructions with local cxl mem as the data source and the data request missed L3. Available PDIST counters: 0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80"
+ },
+ {
"BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -642,7 +657,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -691,6 +706,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C00001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -735,6 +761,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C00001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by a cache on a remote socket where a snoop hit a modified line in another core's caches which forwarded the data.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -757,6 +794,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703000001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that hit a modified line in a distant L3 Cache or were snooped from a distant core's L1/L2 caches on this socket when the system is in SNC (sub-NUMA cluster) mode.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -790,6 +838,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C00002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -812,6 +871,28 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C00002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703000002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts writebacks of modified cachelines and streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -834,6 +915,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C04477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -856,6 +948,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C04477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were not supplied by the local socket's L1, L2, or L3 caches and were supplied by a remote socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -900,6 +1003,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703004477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that hit a modified line in a distant L3 Cache or were snooped from a distant core's L1/L2 caches on this socket when the system is in SNC (sub-NUMA cluster) mode.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -937,7 +1051,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc.. Available PDIST counters: 0",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -946,7 +1060,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -955,7 +1069,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -964,7 +1078,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -973,7 +1087,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -982,7 +1096,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.MEM_UC",
- "PublicDescription": "This event counts noncacheable memory data read transactions. Available PDIST counters: 0",
+ "PublicDescription": "This event counts noncacheable memory data read transactions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -992,7 +1106,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1002,7 +1116,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1012,7 +1126,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1022,7 +1135,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1031,7 +1144,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1040,7 +1152,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1049,7 +1161,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -1058,7 +1170,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1067,7 +1179,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -1076,7 +1188,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf"
},
@@ -1085,7 +1196,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1094,7 +1205,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1103,7 +1214,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1112,7 +1223,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json b/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
index 1832dd952f66..59789eee060c 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
@@ -5,7 +5,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the floating point divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -23,7 +22,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -32,7 +30,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -41,7 +38,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -50,7 +46,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -59,7 +54,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -68,7 +62,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -77,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -86,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -95,7 +87,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -104,7 +96,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -122,7 +114,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18"
},
@@ -131,7 +123,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -140,7 +132,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -149,7 +141,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x60"
},
@@ -158,7 +150,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -167,7 +159,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -176,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc"
},
@@ -194,7 +186,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -203,7 +194,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -212,7 +202,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -221,7 +210,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -230,7 +218,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR",
"SampleAfterValue": "100003",
"UMask": "0x3"
},
@@ -239,7 +227,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -248,7 +235,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.VECTOR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR",
"SampleAfterValue": "100003",
"UMask": "0x1c"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json b/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
index b7cd92fbecd5..d580d305c926 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -22,7 +22,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2"
},
@@ -31,7 +30,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -303,7 +302,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -314,7 +313,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -323,7 +321,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -333,7 +331,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -343,7 +341,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -352,7 +350,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -362,7 +360,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -372,7 +370,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -381,7 +379,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -391,7 +389,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -402,7 +400,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -411,7 +409,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -420,7 +418,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -430,7 +428,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -441,7 +439,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -450,7 +448,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -460,7 +458,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -471,7 +469,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
index 9a620e1b8de8..cc3c834ca286 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -381,7 +381,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -416,39 +416,39 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -456,7 +456,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -464,7 +464,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -480,7 +480,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -496,7 +496,7 @@
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -613,7 +613,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS:R, 74.6 * tma_info_system_core_frequency) + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD:R, 76.6 * tma_info_system_core_frequency) * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -633,6 +632,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_CXL_MEM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM + MEM_LOAD_RETIRED.LOCAL_CXL_MEM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD:R, 74.6 * tma_info_system_core_frequency) + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD:R, 74.6 * tma_info_system_core_frequency) * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -662,7 +670,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_cxl_mem_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -720,7 +728,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -848,7 +856,7 @@
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1395,19 +1403,19 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "CacheHits;Offcore",
+ "MetricGroup": "CacheHits;Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
},
{
@@ -1433,21 +1441,21 @@
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_dram_bw",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_l3m_bw",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_offcore_bw",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
@@ -1491,7 +1499,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1538,7 +1546,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1550,16 +1558,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_CXLCM_RxC_PACK_BUF_INSERTS.MEM_DATA / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_CXLDP_TxC_AGF_INSERTS.M2S_DATA / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD + UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1771,12 +1791,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1790,7 +1810,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * min(MEM_LOAD_RETIRED.L2_HIT:R, 4.4 * tma_info_system_core_frequency) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -1809,12 +1828,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_LOAD_RETIRED.L3_HIT * min(MEM_LOAD_RETIRED.L3_HIT:R, 32.6 * tma_info_system_core_frequency) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1897,6 +1915,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_INST_RETIRED.LOCK_LOADS * MEM_INST_RETIRED.LOCK_LOADS:R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1929,7 +1948,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1938,13 +1957,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1954,7 +1973,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2007,7 +2025,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2042,6 +2060,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2103,6 +2122,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2112,6 +2132,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "max(EXE_ACTIVITY.EXE_BOUND_0_PORTS - RESOURCE_STALLS.SCOREBOARD, 0) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2121,6 +2142,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2130,7 +2152,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2140,7 +2161,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2150,7 +2170,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
- "MetricExpr": "(MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM:R + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD:R) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricExpr": "(MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * PEBS + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * PEBS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2159,7 +2179,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM:R * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricExpr": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * PEBS * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
"MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2177,7 +2197,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2205,7 +2225,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2237,7 +2256,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
index 4db39f304c2c..96f40390becf 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "2",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "PublicDescription": "Cycles while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -15,7 +14,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6"
},
@@ -24,7 +22,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -34,7 +32,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -44,7 +41,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -54,7 +50,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -64,7 +60,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -412,7 +408,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -422,7 +417,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ. Available PDIST counters: 0",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -431,7 +426,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -449,7 +444,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt).",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -458,7 +453,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -467,7 +462,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -476,7 +471,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -485,7 +480,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Counts the number of times RTM commit succeeded. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -494,7 +489,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.START",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -503,7 +498,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_READ",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -512,7 +507,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes. Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -521,7 +516,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
"SampleAfterValue": "100003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/other.json b/tools/perf/pmu-events/arch/x86/graniterapids/other.json
index 8b7aa4caec46..c0747750b1a8 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/other.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count. Available PDIST counters: 0",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -13,11 +13,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
{
+ "BriefDescription": "HW_INTERRUPTS.MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -34,7 +58,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
index 27af3bd6bacf..0fef8fd61974 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
@@ -5,7 +5,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -15,7 +15,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b"
},
@@ -271,7 +270,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -280,7 +279,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -289,7 +288,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70"
},
@@ -298,7 +297,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -307,7 +306,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2"
},
@@ -316,7 +315,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -327,7 +325,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -336,7 +333,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -344,7 +341,7 @@
"BriefDescription": "Reference cycles when the core is not in halt state.",
"Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x3"
},
@@ -353,7 +350,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -361,7 +358,7 @@
"BriefDescription": "Core cycles when the thread is not in halt state",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -370,7 +367,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003"
},
{
@@ -379,7 +376,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -389,7 +385,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -399,7 +394,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -409,7 +403,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -419,7 +412,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -429,7 +421,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -438,7 +429,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb7",
"EventName": "EXE.AMX_BUSY",
- "PublicDescription": "Counts the cycles where the AMX (Advance Matrix Extension) unit is busy performing an operation. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -447,7 +437,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -456,7 +446,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc"
},
@@ -465,7 +454,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -474,7 +463,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -483,7 +472,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -493,7 +482,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21"
},
@@ -503,7 +491,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -512,7 +500,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -521,7 +509,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -546,7 +534,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -555,7 +542,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -572,7 +559,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -583,7 +570,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -592,7 +579,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80"
},
@@ -601,7 +588,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.MBA_STALLS",
- "PublicDescription": "INT_MISC.MBA_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -610,7 +596,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -621,7 +607,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -630,7 +615,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -639,7 +624,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13"
},
@@ -648,7 +632,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac"
},
@@ -657,7 +640,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -666,7 +649,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -675,7 +658,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -684,7 +666,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -693,7 +674,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -702,7 +682,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -711,7 +690,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -720,7 +699,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88"
},
@@ -729,7 +708,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82"
},
@@ -738,7 +717,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -748,7 +727,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -758,7 +737,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -767,7 +746,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -778,7 +757,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -787,7 +766,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -796,7 +775,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20"
},
@@ -805,7 +784,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -814,7 +793,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -823,7 +802,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -832,7 +810,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -844,7 +822,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -853,7 +831,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when RS was empty and a resource allocation stall is asserted Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -862,7 +839,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
@@ -871,7 +848,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -880,7 +857,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
@@ -889,7 +866,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10"
},
@@ -897,7 +873,7 @@
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
- "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3). Available PDIST counters: 0",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -906,7 +882,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
},
@@ -915,7 +891,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0. Available PDIST counters: 0",
+ "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -924,7 +900,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -933,7 +909,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -942,7 +918,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -951,7 +927,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -960,7 +936,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -969,7 +945,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -978,7 +954,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
@@ -987,7 +963,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -997,7 +973,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1007,7 +983,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1017,7 +993,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1027,7 +1003,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1037,7 +1013,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1047,7 +1023,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1057,7 +1033,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1067,7 +1043,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1078,7 +1054,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1087,7 +1063,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1096,7 +1071,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -1105,7 +1080,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1115,7 +1090,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1125,7 +1099,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1134,7 +1108,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1145,7 +1119,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -1154,7 +1127,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1165,7 +1138,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
index b782f6d54fc2..721fc42797b1 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
@@ -10,6 +10,15 @@
"Unit": "CHACMS"
},
{
+ "BriefDescription": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "Unit": "CHACMS"
+ },
+ {
"BriefDescription": "Counts the number of cycles FAST trigger is received from the global FAST distress wire.",
"Counter": "0,1,2,3",
"EventCode": "0x34",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
index 6667fbc50452..5eb1145f204f 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
@@ -833,12 +833,20 @@
"Counter": "0,1,2,3",
"EventCode": "0x1F",
"EventName": "UNC_I_MISC1.LOST_FWD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x10",
"Unit": "IRP"
},
{
+ "BriefDescription": "Misc Events - Set 1 : Received Invalid : Secondary received a transfer that did not have sufficient MESI state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1F",
+ "EventName": "UNC_I_MISC1.SEC_RCVD_INVLD",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "IRP"
+ },
+ {
"BriefDescription": "Snoop Hit E/S responses",
"Counter": "0,1,2,3",
"EventCode": "0x12",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
index f4f956966e16..2ea2637df3fb 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
@@ -1321,7 +1321,6 @@
"FCMask": "0x01",
"PerPkg": "1",
"PortMask": "0x0FF",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IIO"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
index b991f6e1afbe..f559e27e2815 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
@@ -60,6 +60,33 @@
"BriefDescription": "CAS count for SubChannel 0 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
"EventName": "UNC_M_CAS_COUNT_SCH0.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -75,6 +102,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 0 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 0, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x05",
@@ -125,6 +161,33 @@
"BriefDescription": "CAS count for SubChannel 1 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
"EventName": "UNC_M_CAS_COUNT_SCH1.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -140,6 +203,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 1 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 1, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x06",
@@ -189,13 +261,52 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFAB1X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFAB2X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFSB1X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFSB2X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "# of cycles MR4 temp readings forced 2x refresh",
"Counter": "0,1,2,3",
"EventCode": "0xA7",
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -206,7 +317,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -217,7 +327,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -228,7 +337,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -239,7 +347,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -250,7 +357,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -261,7 +367,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -272,7 +377,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -283,7 +387,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -294,7 +397,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -305,7 +407,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -316,7 +417,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -327,7 +427,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x10",
"Unit": "IMC"
},
@@ -338,7 +437,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x20",
"Unit": "IMC"
},
@@ -349,7 +447,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x40",
"Unit": "IMC"
},
@@ -360,7 +457,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x80",
"Unit": "IMC"
},
@@ -371,7 +467,6 @@
"EventName": "UNC_M_POWER_CHANNEL_PPD_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"Unit": "IMC"
},
{
@@ -381,7 +476,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -392,7 +486,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -423,7 +516,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.MR4BLKEN",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -434,7 +526,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.RAPLBLK",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -615,7 +706,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -626,7 +716,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -637,7 +726,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -648,7 +736,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -659,7 +746,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -670,7 +756,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -681,7 +766,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -692,7 +776,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -703,7 +786,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -714,7 +796,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
index 3d3f88600e26..609a9549cbf3 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -14,7 +14,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -23,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -32,7 +32,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -41,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -50,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -59,7 +59,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -68,7 +68,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -78,7 +78,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -87,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -96,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -105,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -114,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -123,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -142,7 +142,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -151,7 +151,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -160,7 +160,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -169,7 +169,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -178,7 +178,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
}
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index b26ea70a3628..aebd82ced1cf 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -509,7 +506,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -521,7 +518,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -696,7 +693,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -746,7 +742,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -856,7 +852,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -865,7 +861,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -874,7 +870,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -883,7 +879,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 8245a98ad4b9..b8845f8a28b9 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -282,7 +282,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -300,7 +299,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -341,7 +339,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -711,7 +708,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -723,7 +720,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -756,6 +753,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -918,7 +916,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -928,6 +925,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -977,7 +975,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1087,7 +1085,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1096,7 +1094,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1105,7 +1103,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1114,7 +1112,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1141,6 +1139,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index c5bfdb2f288b..cf9ed3edb694 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -1,63 +1,63 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C9_Pkg_Residency",
"ScaleUnit": "100%"
@@ -85,7 +85,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -134,6 +133,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -148,39 +148,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -188,6 +193,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -196,6 +202,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -204,6 +211,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -212,7 +220,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -220,6 +229,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -427,7 +437,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -619,6 +629,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_lsd + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1068,7 +1079,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1092,6 +1103,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1107,7 +1124,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1119,7 +1136,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1128,7 +1145,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1296,12 +1313,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1325,7 +1342,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1339,7 +1355,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1445,7 +1461,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1454,7 +1470,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1522,7 +1538,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1656,7 +1672,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1693,7 +1709,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1707,7 +1722,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1721,7 +1736,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index a886a0cfee07..f58eec2a1788 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -79,6 +79,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_read_local",
@@ -97,6 +103,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_write_local",
@@ -109,6 +121,24 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
+ "MetricName": "io_full_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
+ "MetricName": "io_partial_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+ "MetricName": "io_read_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
"MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
"MetricName": "itlb_2nd_level_large_page_mpi",
@@ -331,7 +361,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -380,6 +409,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -394,39 +424,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -434,7 +469,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -442,7 +478,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -450,6 +487,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -458,7 +496,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -466,6 +505,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -585,6 +625,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "43.5 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -615,7 +664,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_cxl_mem_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -673,7 +722,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -865,6 +914,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1320,7 +1370,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1338,6 +1388,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1353,7 +1409,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1365,16 +1421,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1433,12 +1501,20 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
+ "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
+ "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
+ "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
@@ -1590,12 +1666,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1619,7 +1695,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1633,7 +1708,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1739,7 +1814,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1748,7 +1823,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1816,7 +1891,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1968,7 +2043,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2005,7 +2080,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -2019,7 +2093,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -2033,7 +2107,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
index 6f84ad47276d..1c225192ba34 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
@@ -6050,7 +6050,7 @@
"EventName": "UNC_CHA_SNOOP_RESP.RSPIFWD",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "Counts when a a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.",
+ "PublicDescription": "Counts when a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.",
"UMask": "0x4",
"Unit": "CHA"
},
@@ -6072,7 +6072,7 @@
"EventName": "UNC_CHA_SNOOP_RESP.RSPSFWD",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "Counts when a a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
+ "PublicDescription": "Counts when a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
"UMask": "0x8",
"Unit": "CHA"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index de651ff9f846..969cb519eec1 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -561,7 +558,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -574,7 +571,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -586,7 +583,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -775,7 +772,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -926,7 +922,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -935,7 +931,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -944,7 +940,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 714d5e6d21e7..1cdd197ac883 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -561,7 +558,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -574,7 +571,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -586,7 +583,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -626,6 +623,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -795,7 +793,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -805,6 +802,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -955,7 +953,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -964,7 +962,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -973,7 +971,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1000,6 +998,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 6f636ea0f216..250c73b21385 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -71,7 +71,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -296,7 +295,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -308,7 +307,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -348,6 +347,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
index ff37d49611c3..3d2616be8ec1 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
@@ -29,6 +29,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cachelines replaced into the L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L1_REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cachelines replaced into the L0 and L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x51",
@@ -233,7 +243,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x28",
"EventName": "L2_PREFETCHES_THROTTLED.XQ_THRESH",
@@ -454,7 +464,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x29",
"EventName": "LLC_PREFETCHES_THROTTLED.XQ_THRESH",
@@ -592,7 +602,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
@@ -603,7 +613,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -613,7 +623,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_SWPF",
- "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x84",
"Unit": "cpu_core"
@@ -624,7 +634,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ANY",
- "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x87",
"Unit": "cpu_core"
@@ -635,7 +645,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -646,7 +656,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
- "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -657,18 +667,29 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_STORES",
- "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x42",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_ANY",
+ "PublicDescription": "Number of retired instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that hit the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
- "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -679,18 +700,39 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
- "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xa",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_ANY",
+ "PublicDescription": "Retired instructions that miss the STLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x17",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that miss the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x11",
"Unit": "cpu_core"
@@ -701,18 +743,28 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x12",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$)",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -723,7 +775,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -734,7 +786,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -745,7 +797,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -756,7 +808,7 @@
"Data_LA": "1",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.MEMSIDE_CACHE",
- "PublicDescription": "Retired load instructions which data source is memory side cache. Available PDIST counters: 0",
+ "PublicDescription": "Retired load instructions which data source is memory side cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"Unit": "cpu_core"
},
@@ -766,7 +818,7 @@
"Data_LA": "1",
"EventCode": "0xd4",
"EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -777,7 +829,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -788,7 +840,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -799,7 +851,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L0",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -809,7 +861,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L1",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_core"
},
@@ -819,7 +871,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -830,7 +882,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -841,7 +893,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -852,7 +904,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -863,7 +915,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "50021",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1037,7 +1089,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -1049,7 +1101,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -1061,7 +1113,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -1073,7 +1125,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -1085,7 +1137,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -1097,7 +1149,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -1109,7 +1161,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -1121,7 +1173,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1254,6 +1306,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E00008",
+ "PublicDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts writebacks of non-modified cachelines that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1266,6 +1330,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E01000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1303,7 +1379,7 @@
},
{
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1315,7 +1391,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1327,7 +1403,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1363,7 +1439,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1375,7 +1451,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1386,6 +1462,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E04477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x21",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
index e2facc4086e9..b21d602e9f1a 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
@@ -108,7 +108,7 @@
"EventName": "FRONTEND_RETIRED.ANY_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -120,7 +120,7 @@
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -169,7 +169,7 @@
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
- "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -199,7 +199,7 @@
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -211,7 +211,7 @@
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -223,7 +223,7 @@
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -235,7 +235,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -247,7 +247,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -259,7 +259,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
- "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -271,7 +271,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -283,7 +283,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -295,7 +295,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -307,7 +307,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -319,7 +319,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -331,7 +331,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -343,7 +343,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -355,7 +355,7 @@
"EventName": "FRONTEND_RETIRED.MISP_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0",
+ "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -367,7 +367,7 @@
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0",
+ "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -397,7 +397,7 @@
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -409,7 +409,7 @@
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
- "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json b/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
index 3c740962e63e..06390a72110d 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
@@ -1,75 +1,47 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C3_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C7_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
- "BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C8_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
- "BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C9_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -555,7 +527,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -725,6 +697,13 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricExpr": "cpu_core@UOPS_DISPATCHED.ALU@ / (6 * tma_info_thread_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
@@ -755,7 +734,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -767,7 +746,7 @@
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -794,35 +773,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -830,7 +809,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -839,7 +818,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -866,7 +845,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -883,7 +862,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -1023,7 +1002,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@R, 25 * tma_info_system_core_frequency)) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1076,7 +1054,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
"MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -1130,7 +1108,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1147,7 +1125,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -1197,7 +1175,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1207,7 +1185,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1217,7 +1195,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1227,7 +1205,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1238,7 +1216,7 @@
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvFB;BvIO;Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -1259,7 +1237,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1437,7 +1415,7 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc",
"Unit": "cpu_core"
@@ -1578,7 +1556,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1587,7 +1565,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1596,7 +1574,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1605,7 +1583,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_DOUBLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1614,7 +1592,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_SINGLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1639,7 +1617,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1694,7 +1672,7 @@
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / tma_info_system_time",
+ "MetricExpr": "64 * cpu_core@L1D.L1_REPLACEMENT@ / 1e9 / tma_info_system_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "tma_info_memory_l1d_cache_fill_bw",
"Unit": "cpu_core"
@@ -1707,6 +1685,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L0 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * (cpu_core@MEM_LOAD_RETIRED.L1_MISS@ + cpu_core@MEM_LOAD_RETIRED.L1_HIT_L1@) / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1dl0_mpki",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / cpu_core@INST_RETIRED.ANY@",
"MetricGroup": "CacheHits;Mem",
@@ -1922,6 +1907,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "cpu_core@IDQ.MS_UOPS@ / cpu_core@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY@",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1955,7 +1947,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1969,14 +1961,22 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "32 * UNC_M_TOTAL_DATA / 1e9 / tma_info_system_time",
+ "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_system_gflops",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
@@ -2021,6 +2021,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_socket_clks",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Run duration time in seconds",
"MetricExpr": "duration_time",
"MetricGroup": "Summary",
@@ -2036,6 +2043,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2156,12 +2170,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
- "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY@ / tma_info_thread_clks",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2177,7 +2191,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L2_HIT@R, 3 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2198,12 +2211,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2285,6 +2297,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2295,7 +2308,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
"MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -2320,7 +2333,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2330,13 +2343,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2347,7 +2360,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2386,7 +2398,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
"MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
@@ -2406,7 +2418,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks / 1.8",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2445,7 +2457,8 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
"MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
@@ -2483,6 +2496,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2493,6 +2507,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2503,6 +2518,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2513,7 +2529,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2524,7 +2539,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2545,7 +2559,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2576,7 +2590,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2611,7 +2624,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2626,6 +2639,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This metric estimates clocks wasted due to loads blocked due to unknown store address (did not do memory disambiguation) or due to unknown store data",
+ "MetricExpr": "7 * cpu_core@LD_BLOCKS.STORE_EARLY\\,cmask\\=1@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_early_blk",
+ "MetricThreshold": "tma_store_early_blk > 0.2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
index 8021a1c7dd3b..caa387e10259 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
@@ -163,7 +163,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -176,7 +176,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -189,7 +189,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -202,7 +202,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "23",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -215,7 +215,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -228,7 +228,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -241,7 +241,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -254,7 +254,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -267,7 +267,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -280,7 +280,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -291,7 +291,7 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
- "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -352,7 +352,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -376,7 +376,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache and were supplied by the system memory (DRAM, MSC, or MMIO).",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -412,7 +412,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache and were supplied by the system memory (DRAM, MSC, or MMIO).",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/other.json b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
index 59949f9541d8..164374edf293 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
@@ -151,7 +151,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -178,6 +178,7 @@
"EventCode": "0xf4",
"EventName": "XQ_PROMOTION.ALL",
"SampleAfterValue": "1000003",
+ "UMask": "0x7",
"Unit": "cpu_atom"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
index 6ac410510628..97797f7b072e 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
@@ -21,8 +21,9 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
@@ -30,8 +31,9 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
@@ -110,7 +112,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -128,7 +130,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
- "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -147,7 +149,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -166,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -176,7 +178,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -186,7 +188,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x102",
"Unit": "cpu_core"
@@ -205,7 +207,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -224,7 +226,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
- "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -261,7 +263,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
- "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -280,13 +282,13 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of taken branch instructions retired",
+ "BriefDescription": "Counts the number of near taken branch instructions retired",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
@@ -299,7 +301,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -327,7 +329,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -336,7 +338,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
- "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x44",
"Unit": "cpu_core"
@@ -355,7 +357,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND",
- "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -365,7 +367,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_COST",
- "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x151",
"Unit": "cpu_core"
@@ -384,7 +386,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -394,7 +396,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
- "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x50",
"Unit": "cpu_core"
@@ -413,7 +415,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -423,7 +425,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -433,7 +435,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8001",
"Unit": "cpu_core"
@@ -443,7 +445,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
- "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x141",
"Unit": "cpu_core"
@@ -453,7 +455,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -462,7 +464,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8002",
"Unit": "cpu_core"
@@ -481,7 +483,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT",
- "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -500,7 +502,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
- "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -510,7 +512,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
- "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x42",
"Unit": "cpu_core"
@@ -520,7 +522,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_COST",
- "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xc0",
"Unit": "cpu_core"
@@ -548,7 +550,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -558,7 +560,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
- "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x60",
"Unit": "cpu_core"
@@ -568,7 +570,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET",
- "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -587,7 +589,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET_COST",
- "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x48",
"Unit": "cpu_core"
@@ -906,7 +908,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -915,7 +917,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.BR_FUSED",
- "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0",
+ "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -925,7 +927,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
+ "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -935,7 +937,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -954,7 +956,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1024,6 +1026,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of uops executed on secondary integer ports 0,1,2,3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.2ND",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on all Integer ports.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1205,7 +1216,7 @@
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ALL",
"SampleAfterValue": "1000003",
- "UMask": "0x10",
+ "UMask": "0x1f",
"Unit": "cpu_atom"
},
{
@@ -1228,6 +1239,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a load got early blocked due to preceding store operation with unknown address or unknown data. Excluding in-line (immediate) wakeups",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_EARLY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address partially overlaps with an older store (size mismatch) - unknown_sta/bad_forward",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1451,7 +1471,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "LBR record is inserted Available PDIST counters: 0",
+ "PublicDescription": "LBR record is inserted Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1605,6 +1625,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots where no uop could issue due to an IQ scoreboard that stalls allocation until a specified older uop retires or (in the case of jump scoreboard) executes. Commonly executed instructions with IQ scoreboards include LFENCE and MFENCE.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.IQ_JEU_SCB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x75",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json
new file mode 100644
index 000000000000..69ef928d57f6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json
@@ -0,0 +1,10 @@
+[
+ {
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "FIXED",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "Unit": "SANTA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
index 7d63580302de..63c4aa2791e4 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
@@ -32,5 +32,13 @@
"Experimental": "1",
"PerPkg": "1",
"Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Total number of read and write byte transfers to/from DRAM, in 32B chunk, per DDR channel. Counter increments by 1 after sending or receiving 32B chunk data.",
+ "Counter": "0,1,2,3,4",
+ "EventCode": "0x3C",
+ "EventName": "UNC_M_TOTAL_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 354ce241500b..3d0c57198056 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,7 +1,7 @@
Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core
-GenuineIntel-6-BE,v1.31,alderlaken,core
-GenuineIntel-6-C[56],v1.09,arrowlake,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.35,alderlake,core
+GenuineIntel-6-BE,v1.35,alderlaken,core
+GenuineIntel-6-C[56],v1.14,arrowlake,core
GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core
GenuineIntel-6-(3D|47),v30,broadwell,core
GenuineIntel-6-56,v12,broadwellde,core
@@ -9,28 +9,28 @@ GenuineIntel-6-4F,v23,broadwellx,core
GenuineIntel-6-55-[56789ABCDEF],v1.25,cascadelakex,core
GenuineIntel-6-DD,v1.00,clearwaterforest,core
GenuineIntel-6-9[6C],v1.05,elkhartlake,core
-GenuineIntel-6-CF,v1.14,emeraldrapids,core
+GenuineIntel-6-CF,v1.20,emeraldrapids,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
-GenuineIntel-6-B6,v1.09,grandridge,core
-GenuineIntel-6-A[DE],v1.10,graniterapids,core
+GenuineIntel-6-B6,v1.10,grandridge,core
+GenuineIntel-6-A[DE],v1.16,graniterapids,core
GenuineIntel-6-(3C|45|46),v36,haswell,core
GenuineIntel-6-3F,v29,haswellx,core
GenuineIntel-6-7[DE],v1.24,icelake,core
-GenuineIntel-6-6[AC],v1.28,icelakex,core
+GenuineIntel-6-6[AC],v1.30,icelakex,core
GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v24,ivytown,core
GenuineIntel-6-2D,v24,jaketown,core
GenuineIntel-6-(57|85),v16,knightslanding,core
-GenuineIntel-6-BD,v1.14,lunarlake,core
-GenuineIntel-6-(AA|AC|B5),v1.14,meteorlake,core
+GenuineIntel-6-BD,v1.19,lunarlake,core
+GenuineIntel-6-(AA|AC|B5),v1.18,meteorlake,core
GenuineIntel-6-1[AEF],v4,nehalemep,core
GenuineIntel-6-2E,v4,nehalemex,core
-GenuineIntel-6-CC,v1.00,pantherlake,core
+GenuineIntel-6-CC,v1.02,pantherlake,core
GenuineIntel-6-A7,v1.04,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-8F,v1.28,sapphirerapids,core
-GenuineIntel-6-AF,v1.11,sierraforest,core
+GenuineIntel-6-8F,v1.35,sapphirerapids,core
+GenuineIntel-6-AF,v1.13,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core
GenuineIntel-6-55-[01234],v1.37,skylakex,core
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
index 82b115183924..d3fc04b2ffbd 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
@@ -14,7 +14,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +33,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -46,7 +45,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -56,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -66,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -77,7 +76,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -87,7 +86,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f",
"Unit": "cpu_core"
@@ -147,7 +146,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -167,7 +166,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -177,7 +176,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -187,7 +186,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -206,7 +205,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf",
"Unit": "cpu_core"
@@ -225,7 +224,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -244,7 +243,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4",
"Unit": "cpu_core"
@@ -254,7 +253,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1",
"Unit": "cpu_core"
@@ -264,7 +263,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27",
"Unit": "cpu_core"
@@ -274,7 +273,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7",
"Unit": "cpu_core"
@@ -284,7 +283,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0",
"Unit": "cpu_core"
@@ -294,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2",
"Unit": "cpu_core"
@@ -304,7 +302,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4",
"Unit": "cpu_core"
@@ -314,7 +312,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24",
"Unit": "cpu_core"
@@ -324,7 +322,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1",
"Unit": "cpu_core"
@@ -334,7 +332,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -344,7 +342,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf",
"Unit": "cpu_core"
@@ -354,7 +352,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -364,7 +361,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -374,7 +371,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -384,7 +381,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2",
"Unit": "cpu_core"
@@ -394,7 +391,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22",
"Unit": "cpu_core"
@@ -404,7 +401,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8",
"Unit": "cpu_core"
@@ -414,7 +411,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28",
"Unit": "cpu_core"
@@ -424,7 +421,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -434,7 +431,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x42",
"EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
- "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION). Available PDIST counters: 0",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -454,7 +451,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -474,7 +471,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f",
"Unit": "cpu_core"
@@ -695,7 +692,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd",
"Unit": "cpu_core"
@@ -947,7 +944,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1177,12 +1173,36 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C1000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1399,11 +1419,23 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C4477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc.. Available PDIST counters: 0",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1413,7 +1445,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1423,7 +1455,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1433,7 +1465,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1443,7 +1475,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1454,7 +1486,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1465,7 +1497,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1476,7 +1508,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1487,7 +1518,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1497,7 +1528,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1507,7 +1537,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1517,7 +1547,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1527,7 +1557,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1537,7 +1567,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1547,7 +1577,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf",
"Unit": "cpu_core"
@@ -1557,7 +1586,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1567,7 +1596,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1577,7 +1606,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1587,7 +1616,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
index ae9778aa755b..28dc5e06ee31 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
@@ -15,7 +15,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the floating point divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -25,7 +24,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -35,7 +34,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -45,7 +43,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -55,7 +52,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -65,7 +61,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -75,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -85,7 +79,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -95,7 +88,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -105,7 +97,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -115,7 +107,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -125,7 +117,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -135,7 +127,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -145,7 +137,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18",
"Unit": "cpu_core"
@@ -155,7 +147,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -165,7 +157,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -175,7 +167,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
index 82727022efb6..6484834b1127 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
@@ -14,7 +14,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +24,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +34,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -44,7 +43,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -405,7 +404,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -417,7 +416,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -427,7 +425,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -438,7 +436,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -449,7 +447,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -459,7 +457,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -470,7 +468,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -481,7 +479,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -491,7 +489,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -502,7 +500,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -514,7 +512,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -524,7 +522,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -534,7 +532,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -545,7 +543,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -557,7 +555,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -567,7 +565,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -578,7 +576,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -590,7 +588,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
index 17b94f810d5a..f0cbeda4d5ca 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "2",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "PublicDescription": "Cycles while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -16,7 +15,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_core"
@@ -90,7 +88,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -101,7 +99,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -112,7 +109,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -123,7 +119,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -134,7 +130,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -411,7 +407,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -422,7 +417,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ. Available PDIST counters: 0",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -432,7 +427,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
index 0088be169f9b..948c16a1f95b 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -541,7 +541,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -748,7 +748,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -787,11 +787,20 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
@@ -799,23 +808,14 @@
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -823,7 +823,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -859,7 +859,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -876,7 +876,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -1007,7 +1007,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@R, 25 * tma_info_system_core_frequency) * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1124,7 +1123,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1253,7 +1252,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1923,7 +1922,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute",
@@ -1984,7 +1983,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1998,7 +1997,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
@@ -2008,7 +2007,7 @@
"MetricExpr": "64 * (UNC_HAC_ARB_TRK_REQUESTS.ALL + UNC_HAC_ARB_COH_TRK_REQUESTS.ALL) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
@@ -2095,6 +2094,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2213,12 +2219,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (cpu_core@MEM_INST_RETIRED.ALL_LOADS@ - cpu_core@MEM_LOAD_RETIRED.FB_HIT@ - cpu_core@MEM_LOAD_RETIRED.L1_MISS@) * 20 / 100, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2234,7 +2240,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L2_HIT@R, 3 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2255,12 +2260,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2342,6 +2346,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2377,7 +2382,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2387,13 +2392,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2404,7 +2409,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2463,7 +2467,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2502,6 +2506,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2570,6 +2575,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2580,6 +2586,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "max(cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ - cpu_core@RESOURCE_STALLS.SCOREBOARD@, 0) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2590,6 +2597,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2600,7 +2608,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2611,7 +2618,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2632,7 +2638,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2663,7 +2669,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2698,7 +2703,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
index cb21bb933617..8320ffd83c51 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count. Available PDIST counters: 0",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -14,7 +14,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -83,7 +82,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
index 22b25708e799..bfdaabe9377d 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
@@ -15,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -26,7 +26,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -36,7 +35,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b",
"Unit": "cpu_core"
@@ -44,6 +43,7 @@
{
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL012, MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
"PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
@@ -62,6 +62,7 @@
{
"BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
"SampleAfterValue": "200003",
@@ -110,6 +111,7 @@
{
"BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
"SampleAfterValue": "200003",
@@ -129,6 +131,7 @@
{
"BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
"SampleAfterValue": "200003",
@@ -148,6 +151,7 @@
{
"BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT_CALL",
"SampleAfterValue": "200003",
@@ -167,6 +171,7 @@
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
"Counter": "0,1,2,3,4,5,6,7",
"Deprecated": "1",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.IND_CALL",
"SampleAfterValue": "200003",
@@ -176,6 +181,7 @@
{
"BriefDescription": "Counts the number of near CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL012, MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
"SampleAfterValue": "200003",
@@ -214,6 +220,7 @@
{
"BriefDescription": "Counts the number of near taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "MTL013",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
"SampleAfterValue": "200003",
@@ -484,7 +491,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -494,7 +501,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -504,7 +511,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70",
"Unit": "cpu_core"
@@ -530,7 +537,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -540,7 +547,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -550,7 +557,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -562,7 +568,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -572,7 +577,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -589,7 +594,7 @@
"BriefDescription": "Reference cycles when the core is not in halt state.",
"Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -609,7 +614,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -626,7 +631,7 @@
"BriefDescription": "Core cycles when the thread is not in halt state",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -644,7 +649,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -654,7 +659,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -665,7 +669,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -676,7 +679,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -687,7 +689,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -698,7 +699,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -709,7 +709,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -719,7 +718,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -729,7 +728,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -739,7 +737,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -749,7 +747,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -759,7 +757,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -770,7 +768,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -781,7 +778,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -791,7 +788,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -801,7 +798,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -846,7 +843,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -856,7 +852,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -875,7 +871,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -887,7 +883,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -897,7 +893,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -907,7 +903,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -919,7 +915,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -929,7 +924,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -939,7 +934,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13",
"Unit": "cpu_core"
@@ -949,7 +943,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac",
"Unit": "cpu_core"
@@ -959,7 +952,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -969,7 +962,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -979,7 +972,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -989,7 +981,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -999,7 +990,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1009,7 +999,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1028,7 +1017,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1047,7 +1036,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88",
"Unit": "cpu_core"
@@ -1066,7 +1055,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -1076,7 +1065,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1087,7 +1076,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1098,7 +1087,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1108,7 +1097,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1128,7 +1117,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1184,7 +1173,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1194,7 +1183,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1213,7 +1202,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1223,7 +1212,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1233,7 +1222,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1243,7 +1231,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1256,7 +1244,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1266,7 +1254,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when RS was empty and a resource allocation stall is asserted Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1285,7 +1272,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "10000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1295,7 +1282,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1305,7 +1292,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1315,7 +1302,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1324,7 +1310,7 @@
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
- "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3). Available PDIST counters: 0",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
"SampleAfterValue": "10000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1334,7 +1320,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1591,7 +1577,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0. Available PDIST counters: 0",
+ "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1601,7 +1587,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1611,7 +1597,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1621,7 +1607,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1631,7 +1617,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1641,7 +1627,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1651,7 +1637,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1661,7 +1647,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1671,7 +1657,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1682,7 +1668,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1693,7 +1679,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1704,7 +1690,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1715,7 +1701,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1726,7 +1712,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1737,7 +1723,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1748,7 +1734,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1759,7 +1745,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1771,7 +1757,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1781,7 +1767,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1791,7 +1776,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1810,7 +1795,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1821,7 +1806,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1840,7 +1824,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1850,7 +1834,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1880,7 +1864,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1890,7 +1873,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1902,7 +1885,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
index f300129e9e2d..305b96b26a4e 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -24,7 +24,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -43,7 +43,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -53,7 +53,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -73,7 +73,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -93,7 +93,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -113,7 +113,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -143,7 +143,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -162,7 +162,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -172,7 +172,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -192,7 +192,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -212,7 +212,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -232,7 +232,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -260,7 +260,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -271,7 +271,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -291,7 +291,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -311,7 +311,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -331,7 +331,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -351,7 +351,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/cache.json b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json
index c84f3d9fdb10..91f5ab908926 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json
@@ -1,5 +1,136 @@
[
{
+ "BriefDescription": "Counts the number of cache lines replaced in L0 data cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L0_REPLACEMENT",
+ "PublicDescription": "Counts L0 data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cachelines replaced into the L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L1_REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cachelines replaced into the L0 and L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L0 and L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x49",
+ "EventName": "L1D_MISS.FB_FULL",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x49",
+ "EventName": "L1D_MISS.L2_STALLS",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of demand requests that missed L1D cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x49",
+ "EventName": "L1D_MISS.LOAD",
+ "PublicDescription": "Count occurrences (rising-edge) of DCACHE_PENDING sub-event0. Impl. sends per-port binary inc-bit the occupancy increases* (at FB alloc or promotion).",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of L1D misses that are outstanding",
+ "Counter": "2",
+ "EventCode": "0x48",
+ "EventName": "L1D_PENDING.LOAD",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "Counter": "2",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PENDING.LOAD_CYCLES",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache lines filling L2",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.E",
+ "PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state. Counts on a per core basis.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of L2 cache accesses from front door requests for Code Read, Data Read, RFO, ITOM, and L2 Prefetches. Does not include rejects or recycles, per core event.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
@@ -9,6 +140,35 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES, L2_RQSTS.ANY]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.ALL",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES, L2_RQSTS.ANY]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 cache accesses from front door requests that resulted in a Hit. Does not include rejects or recycles, per core event.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1bf",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_RQSTS.MISS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.MISS",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L2 code requests",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x24",
@@ -29,6 +189,106 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES, L2_REQUEST.ALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ANY",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES, L2_REQUEST.ALL]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x44",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x24",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read miss L2 cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_REQUEST.MISS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All accesses to L2 cache [This event is alias to L2_REQUEST.ALL,L2_RQSTS.ANY]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL,L2_RQSTS.ANY]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x42",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x22",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when L1D is locked",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x42",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
@@ -69,12 +329,76 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an L1 demand load miss.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.L2_HIT",
+ "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.L2_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x38",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the caches. DRAM, MMIO or other LOCAL memory type provides the data.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS_LOCALMEM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the caches, a snoop was required, and hits in other core or module on same die. Another core provides the data with a fwd, no fwd, or hitM.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS_OTHERMOD",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts all retired load instructions.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
@@ -85,16 +409,462 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired software prefetch instructions.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_SWPF",
+ "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x84",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All retired memory instructions.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ANY",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x87",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with locked access.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x42",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_ANY",
+ "PublicDescription": "Number of retired instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired SWPF instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_ANY",
+ "PublicDescription": "Number of retired instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x17",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired SWPF instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$)",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
+ "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0,1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0,1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions which data source is memory side cache.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.MEMSIDE_CACHE",
+ "PublicDescription": "Retired load instructions which data source is memory side cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x101",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT_L0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT_L1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0,1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100021",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100021",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "PublicDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in memside cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.MEMSIDE_CACHE",
+ "PublicDescription": "Counts the number of load ops retired that miss the L3 cache and hit in memside cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache in which a snoop was required and modified data was forwarded.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.L3_HIT_SNOOP_HITM",
+ "PublicDescription": "Counts the number of load ops retired that hit in the L3 cache in which a snoop was required and modified data was forwarded. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "PublicDescription": "Counts the number of load ops retired that hit the L1 data cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss in the L1 data cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "PublicDescription": "Counts the number of load ops retired that miss in the L1 data cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "PublicDescription": "Counts the number of load ops retired that hit in the L2 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss in the L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "PublicDescription": "Counts the number of load ops retired that miss in the L2 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "PublicDescription": "Counts the number of load ops retired that hit in the L3 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1c",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x44",
+ "EventName": "MEM_STORE_RETIRED.L2_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of cache-lines required by retired stores whose Data Source is: Memory Side Cache",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x44",
+ "EventName": "MEM_STORE_RETIRED.MEMSIDE_CACHE",
+ "SampleAfterValue": "100021",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired. A single uop that performs both a load AND a store will be counted as 1, not 2 (e.g. ADD [mem], CONST).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL",
+ "PublicDescription": "Counts the number of memory uops retired. A single uop that performs both a load AND a store will be counted as 1, not 2 (e.g. ADD [mem], CONST). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x83",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "PublicDescription": "Counts the number of load ops retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_atom"
@@ -104,130 +874,246 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "PublicDescription": "Counts the number of store ops retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
+ "PublicDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of load uops retired that performed one or more locks",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "PublicDescription": "Counts the number of load uops retired that performed one or more locks Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x21",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired that were splits.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT",
+ "PublicDescription": "Counts the number of memory uops retired that were splits. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x43",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split load uops.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PublicDescription": "Counts the number of retired split load uops. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split store uops.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "PublicDescription": "Counts the number of retired split store uops. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x42",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired that missed in the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS",
+ "PublicDescription": "Counts the number of memory uops retired that missed in the second level TLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x13",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that miss in the second Level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "PublicDescription": "Counts the number of load uops retired that miss in the second Level TLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x11",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of store uops retired that miss in the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "PublicDescription": "Counts the number of store uops retired that miss in the second level TLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x12",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
+ "PublicDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Retired memory uops for any access",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe5",
+ "EventName": "MEM_UOP_RETIRED.ANY",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E00008",
+ "PublicDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E01000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand data reads that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -241,7 +1127,7 @@
},
{
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -252,6 +1138,30 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x40001E00001",
+ "PublicDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x20001E00001",
+ "PublicDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -265,7 +1175,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -274,5 +1184,230 @@
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x40001E00002",
+ "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E04477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Any memory transaction that reached the SQ.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand and prefetch data reads",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cacheable and Non-Cacheable code read requests",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2c",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.ANY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x40",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/counter.json b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json
index 69f158a97707..432b6946ccbc 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/counter.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json
@@ -1,12 +1,17 @@
[
{
"Unit": "cpu_atom",
- "CountersNumFixed": "3",
- "CountersNumGeneric": "39"
+ "CountersNumFixed": "7",
+ "CountersNumGeneric": "8"
},
{
"Unit": "cpu_core",
"CountersNumFixed": "4",
"CountersNumGeneric": "10"
+ },
+ {
+ "Unit": "iMC",
+ "CountersNumFixed": "0",
+ "CountersNumGeneric": "5"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/floating-point.json b/tools/perf/pmu-events/arch/x86/pantherlake/floating-point.json
new file mode 100644
index 000000000000..e306a45b22ee
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/floating-point.json
@@ -0,0 +1,359 @@
+[
+ {
+ "BriefDescription": "Cycles when floating-point divide unit is busy executing divide or square root operations.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.FPDIV_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for floating-point operations only.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts all microcode FP assists.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.FP",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "ASSISTS.SSE_AVX_MIX",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.SSE_AVX_MIX",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of FP-arith-uops dispatched on 1st VEC port (port 0). FP-arith-uops are of type ADD* / SUB* / MUL / FMA* / DPP.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V0",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of FP-arith-uops dispatched on 2nd VEC port (port 1)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of FP-arith-uops dispatched on 3rd VEC port (port 5)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V2",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of FP-arith-uops dispatched on 4th VEC port",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V3",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.256B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x18",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.SCALAR",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.SCALAR_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.SCALAR_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.VECTOR",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3c",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FP_ARITH_OPS_RETIRED.VECTOR_128B",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.VECTOR_128B",
+ "SampleAfterValue": "100003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FP_ARITH_OPS_RETIRED.VECTOR_256B",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_OPS_RETIRED.VECTOR_256B",
+ "SampleAfterValue": "100003",
+ "UMask": "0x30",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of all types of floating point operations per uop with all default weighting",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc8",
+ "EventName": "FP_FLOPS_RETIRED.ALL",
+ "PublicDescription": "Counts the number of all types of floating point operations per uop with all default weighting Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations that produce 32 bit single precision results",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc8",
+ "EventName": "FP_FLOPS_RETIRED.FP32",
+ "PublicDescription": "Counts the number of floating point operations that produce 32 bit single precision results Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations that produce 64 bit double precision results",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc8",
+ "EventName": "FP_FLOPS_RETIRED.FP64",
+ "PublicDescription": "Counts the number of floating point operations that produce 64 bit double precision results Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a packed 128 bit double precision floating point. This may be SSE or AVX.128 operations.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.128B_DP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a packed 128 bit double precision floating point. This may be SSE or AVX.128 operations. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a packed 128 bit single precision floating point. This may be SSE or AVX.128 operations.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.128B_SP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a packed 128 bit single precision floating point. This may be SSE or AVX.128 operations. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a packed 256 bit double precision floating point.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.256B_DP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a packed 256 bit double precision floating point. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a packed 256 bit single precision floating point.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.256B_SP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a packed 256 bit single precision floating point. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a scalar 32bit single precision floating point.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.32B_SP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a scalar 32bit single precision floating point. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired instructions whose sources are a scalar 64 bit double precision floating point.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.64B_DP",
+ "PublicDescription": "Counts the number of retired instructions whose sources are a scalar 64 bit double precision floating point. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of floating point retired instructions.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc7",
+ "EventName": "FP_INST_RETIRED.ALL",
+ "PublicDescription": "Counts the total number of floating point retired instructions. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on all floating point ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0, 1, 2, 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.PRIMARY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer store data port.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.STD",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "PublicDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json
index aedf631e3c0f..d36faa683d3f 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json
@@ -1,5 +1,370 @@
[
{
+ "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Clears due to Unknown Branches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x60",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a conditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to an indirect branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.INDIRECT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a return branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a direct, unconditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.UNCOND",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles the Microcode Sequencer is busy.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x87",
+ "EventName": "DECODE.MS_BUSY",
+ "SampleAfterValue": "500009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x61",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired ANT branches",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ANY_ANT",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x9",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "PublicDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x12",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x608006",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x601006",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600206",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x610006",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x602006",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600406",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x620006",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x604006",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600806",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted Retired ANT branches",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.MISP_ANT",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x9",
+ "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts flows delivered by the Microcode Sequencer",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.MS_FLOWS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that caused clears due to being Unknown Branches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x17",
+ "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to Instruction L1 cache miss, that hit in the L2 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "FRONTEND_RETIRED_SOURCE.ICACHE_L2_HIT",
+ "PublicDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to Instruction L1 cache miss, that hit in the L2 cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss that hit in the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "FRONTEND_RETIRED_SOURCE.ITLB_STLB_HIT",
+ "PublicDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss that hit in the second level TLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss that also missed the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc9",
+ "EventName": "FRONTEND_RETIRED_SOURCE.ITLB_STLB_MISS",
+ "PublicDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss that also missed the second level TLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x80",
@@ -9,6 +374,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are present.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x80",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are not present. -",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x80",
@@ -18,6 +392,134 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "ICACHE_DATA.STALL_PERIODS",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALL_PERIODS",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "8",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "8",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES_ANY",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of switches from DSB or MITE to the MS",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x9c",
@@ -26,5 +528,38 @@
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when no uops are delivered by the IDQ for 2 or more cycles when backend of the machine is not stalled - normally indicating a Fetch Latency issue",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_BUBBLES.FETCH_LATENCY",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls for 2 or more cycles - normally indicating a Fetch Latency issue.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "8",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_BUBBLES.STARVATION_CYCLES",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json
index 47daee8cc00f..3d31e620383d 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json
@@ -1,5 +1,42 @@
[
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xf4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to request buffers full or lock in progress.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.WCB_FULL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
"Counter": "2,3,4,5,6,7,8,9",
"Data_LA": "1",
@@ -7,7 +44,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -20,7 +57,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "1009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -33,7 +70,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -46,7 +83,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "23",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -59,7 +96,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "503",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -72,7 +109,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -85,7 +122,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -98,7 +135,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "101",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -111,7 +148,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "2003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -124,7 +161,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0,1",
"SampleAfterValue": "50021",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -135,12 +172,32 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
- "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts misaligned loads that are 4K page splits.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x13",
+ "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+ "PublicDescription": "Counts misaligned loads that are 4K page splits. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts misaligned stores that are 4K page splits.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x13",
+ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+ "PublicDescription": "Counts misaligned stores that are 4K page splits. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -154,7 +211,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -178,7 +235,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -202,7 +259,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -211,5 +268,35 @@
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand data read requests that miss the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.",
+ "Counter": "0,1,2,3",
+ "CounterMask": "1",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/other.json b/tools/perf/pmu-events/arch/x86/pantherlake/other.json
new file mode 100644
index 000000000000..d49651d4f112
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "BriefDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.HARDWARE",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "ASSISTS.PAGE_FAULT",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.PAGE_FAULT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10800",
+ "PublicDescription": "Counts streaming stores that have any type of response. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles the uncore cannot take further requests",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x2d",
+ "EventName": "XQ.FULL",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json
index 2caf2f85327f..fb87d30c403d 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json
@@ -1,10 +1,51 @@
[
{
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.DIV_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when integer divide unit is busy executing divide or square root operations.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer operations only.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa2",
+ "EventName": "BE_STALLS.SCOREBOARD",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
+ "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_atom"
},
@@ -13,8 +54,242 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of not taken conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PublicDescription": "Counts the number of not taken conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Not taken branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken backward conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN_BWD",
+ "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken forward conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN_FWD",
+ "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Far branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PublicDescription": "Counts the number of near CALL branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x30",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x30",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "near relative call instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_REL_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_DIRECT_CALL",
+ "PublicDescription": "Counts near relative call instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_REL_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "near relative jump instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_REL_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_DIRECT_JMP",
+ "PublicDescription": "Counts near relative jump instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_REL_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Indirect near branch instructions retired (excluding returns) [This event is alias to BR_INST_RETIRED.INDIRECT]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_INDIRECT",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. [This event is alias to BR_INST_RETIRED.INDIRECT] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Indirect near call instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_IND_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_INDIRECT_CALL",
+ "PublicDescription": "Counts indirect near call instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_IND_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Indirect near jump instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_IND_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_INDIRECT_JMP",
+ "PublicDescription": "Counts indirect near jump instructions retired. [This event is alias to BR_INST_RETIRED.NEAR_IND_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_IND_CALL",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_IND_JMP",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_INDIRECT_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Indirect and Direct Relative near jump instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_JMP",
+ "PublicDescription": "Counts near jump instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0xc0",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_DIRECT_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_REL_CALL",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_DIRECT_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_DIRECT_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_REL_JMP",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_INST_RETIRED.NEAR_DIRECT_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Return instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of near taken branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Counts the number of near taken branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Taken branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0xfb",
"Unit": "cpu_core"
},
{
@@ -22,7 +297,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
+ "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_atom"
},
@@ -31,15 +306,459 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.ALL_BRANCHES_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.ALL_BRANCHES_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.ALL_BRANCHES_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_TPEBS",
+ "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.ALL_BRANCHES_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_NEAR_IND",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT] Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x50",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8007",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted not taken conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PublicDescription": "Counts the number of mispredicted not taken conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_NTAKEN_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_NTAKEN_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8004",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_NTAKEN_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN_TPEBS",
+ "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_NTAKEN_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8004",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted taken conditional branch instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PublicDescription": "Counts the number of mispredicted taken conditional branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken backward.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD",
+ "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_BWD_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_BWD_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8001",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_BWD_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_TPEBS",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_BWD_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8001",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8003",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken forward.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD",
+ "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_FWD_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_FWD_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8002",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_FWD_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_TPEBS",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_FWD_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8002",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_TPEBS",
+ "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_TAKEN_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8003",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TPEBS",
+ "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.COND_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8007",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_CALL_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_CALL_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8010",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8050",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_JMP",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_JMP_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_JMP_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_INDIRECT_JMP_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8040",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. [This event is alias to BR_MISP_RETIRED.ALL_NEAR_IND]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT",
+ "PublicDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. [This event is alias to BR_MISP_RETIRED.ALL_NEAR_IND] Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x50",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Miss-predicted near indirect branch instructions retired (excluding returns) [This event is alias to BR_MISP_RETIRED.INDIRECT]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. [This event is alias to BR_MISP_RETIRED.INDIRECT] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted indirect CALL retired. [This event is alias to BR_MISP_RETIRED.INDIRECT_CALL]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT_CALL",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. [This event is alias to BR_MISP_RETIRED.INDIRECT_CALL] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.INDIRECT_CALL_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT_CALL_TPEBS",
+ "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.INDIRECT_CALL_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x8010",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Miss-predicted near indirect jump instructions retired. [This event is alias to BR_MISP_RETIRED.INDIRECT_JMP]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT_JMP",
+ "PublicDescription": "Miss-predicted near indirect jump instructions retired. [This event is alias to BR_MISP_RETIRED.INDIRECT_JMP] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Miss-predicted near indirect jump instructions retired. Precise cost. [This event is alias to BR_MISP_RETIRED.INDIRECT_JMP_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT_JMP_TPEBS",
+ "PublicDescription": "Miss-predicted near indirect jump instructions retired. Precise cost. [This event is alias to BR_MISP_RETIRED.INDIRECT_JMP_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8040",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.INDIRECT_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_INDIRECT_TPEBS",
+ "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.INDIRECT_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8050",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS [This event is alias to BR_MISP_RETIRED.RET]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_RETURN",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. [This event is alias to BR_MISP_RETIRED.RET] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.RET_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_RETURN_TPEBS",
+ "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.RET_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8008",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
+ "UMask": "0xfb",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_TAKEN_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_TAKEN_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x80fb",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.NEAR_TAKEN_COST]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN_TPEBS",
+ "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. [This event is alias to BR_MISP_RETIRED.NEAR_TAKEN_COST] Available PDIST counters: 0,1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x80fb",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_RETURN]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_RETURN] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_RETURN_TPEBS]",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET_COST",
+ "PublicDescription": "This event is deprecated. [This event is alias to BR_MISP_RETIRED.NEAR_RETURN_TPEBS] Available PDIST counters: 0,1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8008",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C01",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C02",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x70",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.CORE",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -57,7 +776,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.CORE_P",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"Unit": "cpu_atom"
},
{
@@ -70,10 +789,30 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Core clocks when a PAUSE is pending.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.PAUSE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Pause instructions",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles.",
"Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x3",
"Unit": "cpu_atom"
},
@@ -92,7 +831,7 @@
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
"PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -110,7 +849,7 @@
"BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -128,7 +867,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"Unit": "cpu_atom"
},
{
@@ -141,11 +880,130 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "16",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total execution stalls.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "4",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Count number of times a load is depending on another load that had just write back its data or in previous or 2 cycles back. This event supports in-direct dependency through a single uop.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x02",
+ "EventName": "DEPENDENT_LOADS.ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "5",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "2",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instruction decoders utilized in a cycle",
+ "Counter": "2",
+ "EventCode": "0x75",
+ "EventName": "INST_DECODED.DECODERS",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of instructions retired.",
"Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.ANY",
"PublicDescription": "Fixed Counter: Counts the number of instructions retired. Available PDIST counters: 32",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -163,7 +1021,8 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "SampleAfterValue": "1000003",
+ "PublicDescription": "Counts the number of instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "2000003",
"Unit": "cpu_atom"
},
{
@@ -171,15 +1030,364 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.BR_FUSED",
+ "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INST_RETIRED.MACRO_FUSED",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.MACRO_FUSED",
+ "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0,1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x30",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired NOP instructions.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.NOP",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0,1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Precise instruction retired with PEBS precise-distribution",
+ "Counter": "Fixed counter 0",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0. Available PDIST counters: 32",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Iterations of Repeat string retired instructions.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.REP_ITERATION",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0,1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Bubble cycles of BPClear.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.BPCLEAR_CYCLES",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0xB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Clears speculative count",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.CLEARS_COUNT",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x7",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TMA slots where uops got dropped",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.UOP_DROPPING",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on secondary integer ports 0,1,2,3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.2ND",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on all Integer ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on a load port.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.LD",
+ "PublicDescription": "Counts the number of uops executed on a load port. This event counts for integer uops even if the destination is FP/vector",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 0,1, 2, 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.PRIMARY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x78",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on a Store address port.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.STA",
+ "PublicDescription": "Counts the number of uops executed on a Store address port. This event counts integer uops even if the data source is FP/vector",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on an integer store data and jump port.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.STD_JMP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of vector integer instructions retired of 128-bit vector-width.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.128BIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x13",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of vector integer instructions retired of 256-bit vector-width.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.256BIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xac",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.ADD_128",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.ADD_256",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.MUL_256",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.MUL_256",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.SHUFFLES",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.SHUFFLES",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_128",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.VNNI_128",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_256",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.VNNI_256",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PublicDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked for any of the following reasons: DTLB miss, address alias, store forward or data unknown (includes memory disambiguation blocks and ESP consuming load blocks).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ALL",
+ "PublicDescription": "Counts the number of retired loads that are blocked for any of the following reasons: DTLB miss, address alias, store forward or data unknown (includes memory disambiguation blocks and ESP consuming load blocks). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Bank conflicts in DCU due to limited lookup ports.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.BANK_CONFLICT",
+ "PublicDescription": "Counts the number of times a load got blocked due to bank conflicts in DCU",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "PublicDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x88",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of times a load got early blocked due to preceding store operation with unknown address or unknown data. Excluding in-line (immediate) wakeups",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_EARLY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_atom"
@@ -195,10 +1403,138 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "8",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa8",
+ "EventName": "LSD.UOPS",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles where no execution is happening due to loads waiting for L1 cache (that is: no execution & load in flight & no load missed L1 cache)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x46",
+ "EventName": "MEMORY_STALLS.L1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles where no execution is happening due to loads waiting for L2 cache (that is: no execution & load in flight & load missed L1 & no load missed L2 cache)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x46",
+ "EventName": "MEMORY_STALLS.L2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles where no execution is happening due to loads waiting for L3 cache (that is: no execution & load in flight & load missed L1 & load missed L2 cache & no load missed L3 Cache)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x46",
+ "EventName": "MEMORY_STALLS.L3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles where no execution is happening due to loads waiting for Memory (that is: no execution & load in flight & a load missed L3 cache)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x46",
+ "EventName": "MEMORY_STALLS.MEM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "LFENCE instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xe0",
+ "EventName": "MISC2_RETIRED.LFENCE",
+ "PublicDescription": "number of LFENCE retired instructions",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
+ "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_atom"
@@ -208,12 +1544,102 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "LBR record is inserted Available PDIST counters: 0",
+ "PublicDescription": "LBR record is inserted Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LFENCE instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.LFENCE",
+ "PublicDescription": "Counts the number of LFENCE instructions retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of accesses to KeyLocker cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.KEYLOCKER_ACCESS",
+ "PublicDescription": "Counts the number of accesses to KeyLocker cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of misses to KeyLocker cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.KEYLOCKER_MISS",
+ "PublicDescription": "Counts the number of misses to KeyLocker cache. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x11",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY_COUNT",
+ "Invert": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
+ "SampleAfterValue": "100003",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when RS was empty and a resource allocation stall is asserted",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY_RESOURCE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.C01_MS_SCB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots where no uop could issue due to an IQ scoreboard that stalls allocation until a specified older uop retires or (in the case of jump scoreboard) executes. Commonly executed instructions with IQ scoreboards include LFENCE and MFENCE.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.IQ_JEU_SCB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.NON_C01_MS_SCB",
+ "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xa4",
@@ -224,6 +1650,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "TMA slots wasted due to incorrect speculations.",
+ "Counter": "0",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BAD_SPEC_SLOTS",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
+ "Counter": "0",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS",
+ "Counter": "3",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
@@ -244,7 +1699,7 @@
},
{
"BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
- "Counter": "36",
+ "Counter": "Fixed counter 4",
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x5",
@@ -260,6 +1715,42 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Fast Nukes such as Memory Ordering Machine clears and MRN nukes",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a branch mispredict that resulted in LSD exit.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.LSD_MISPREDICT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Branch Mispredict",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL_P]",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
@@ -269,6 +1760,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to due to certain allocation restrictions.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL]",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
@@ -278,8 +1778,35 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to LSD entry.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.LSD",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop). This could be caused by RSV full or load/store buffer block.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.",
- "Counter": "37",
+ "Counter": "Fixed counter 5",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x6",
@@ -295,8 +1822,80 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTClear",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ms",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.CISC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stall",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.DECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x72",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to itlb miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ITLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend that do not categorize into any other common frontend stall",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.OTHER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to predecode wrong",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.",
- "Counter": "38",
+ "Counter": "Fixed counter 6",
"EventName": "TOPDOWN_RETIRING.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x7",
@@ -313,6 +1912,256 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Number of non dec-by-all uops decoded by decoder",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x76",
+ "EventName": "UOPS_DECODED.DEC0_UOPS",
+ "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on INT EU ALU ports.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.ALU",
+ "PublicDescription": "Number of ALU integer uops dispatch to execution.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on any INT EU ports",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.INT_EU_ALL",
+ "PublicDescription": "Number of integer uops dispatched to execution.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Uops dispatched/executed by any of the 3 JEUs (all ups that hold the JEU including macro; micro jumps; fetch-from-eip)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.JMP",
+ "PublicDescription": "Number of jump uops dispatch to execution",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on Load ports",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.LOAD",
+ "PublicDescription": "Number of Load uops dispatched to execution.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of (shift) 1-cycle Uops dispatched/executed by any of the Shift Eus",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.SHIFT",
+ "PublicDescription": "Number of SHIFT integer uops dispatch to execution",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Uops dispatched/executed by Slow EU (e.g. 3+ cycles LEA, >1 cycles shift, iDIVs, CR; *H operation)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.SLOW",
+ "PublicDescription": "Number of Slow integer uops dispatch to execution.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Uops dispatched on STA ports",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.STA",
+ "PublicDescription": "Number of STA (Store Address) uops dispatch to execution",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on STD ports",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.STD",
+ "PublicDescription": "Number of STD (Store Data) uops dispatch to execution",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "Counter": "3",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "Counter": "3",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "Counter": "3",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "Counter": "3",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "3",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.STALLS",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "Counter": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops that RAT issues to RS",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xae",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "UOPS_ISSUED.CYCLES",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xae",
+ "EventName": "UOPS_ISSUED.CYCLES",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with retired uop(s).",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.CYCLES",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired uops except the last uop of each instruction.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.HEAVY",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of integer divide uops retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "PublicDescription": "Counts the number of integer divide uops retired. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "UOPS_RETIRED.MS",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of non-speculative switches to the Microcode Sequencer (MS)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS_SWITCHES",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "PublicDescription": "Switches to the Microcode Sequencer",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc2",
@@ -321,5 +2170,25 @@
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.STALLS",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops retired, includes those in ms flows.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.X87",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/uncore-memory.json
new file mode 100644
index 000000000000..a881b99be5f3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/uncore-memory.json
@@ -0,0 +1,26 @@
+[
+ {
+ "BriefDescription": "Read CAS command sent to DRAM",
+ "Counter": "0,1,2,3,4",
+ "EventCode": "0x22",
+ "EventName": "UNC_M_CAS_COUNT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write CAS command sent to DRAM",
+ "Counter": "0,1,2,3,4",
+ "EventCode": "0x23",
+ "EventName": "UNC_M_CAS_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Total number of read and write byte transfers to/from DRAM, in 32B chunk, per DDR channel. Counter increments by 1 after sending or receiving 32B chunk data.",
+ "Counter": "0,1,2,3,4",
+ "EventCode": "0x3C",
+ "EventName": "UNC_M_TOTAL_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json
index 690c5dff9d9e..8d56c16b2a39 100644
--- a/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json
@@ -1,5 +1,44 @@
[
{
+ "BriefDescription": "Counts the number of page walks initiated by a demand load that missed the first and second level TLBs.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x320",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x08",
@@ -20,6 +59,86 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Page walks completed due to a demand data load to a 1G page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks initiated by a store that missed the first and second level TLBs.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x320",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x49",
@@ -40,6 +159,85 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Page walks completed due to a demand data store to a 1G page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x120",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "CounterMask": "1",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x85",
@@ -58,5 +256,55 @@
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for iside in PMH every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals. Walks could be counted by edge detecting on this event, but would count restarted suspended walks.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked due to a first level TLB miss.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DTLB_MISS",
+ "PublicDescription": "Counts the number of retired loads that are blocked due to a first level TLB miss. Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
index 71737a1a1997..79dc34157481 100644
--- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
@@ -1,63 +1,63 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C9_Pkg_Residency",
"ScaleUnit": "100%"
@@ -85,7 +85,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -134,6 +133,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -148,39 +148,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -188,6 +193,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -196,6 +202,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -204,6 +211,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -212,7 +220,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -220,6 +229,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -427,7 +437,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -619,6 +629,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_lsd + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1074,7 +1085,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1098,6 +1109,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1113,7 +1130,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1125,7 +1142,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1134,7 +1151,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1165,6 +1182,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -1316,12 +1334,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1345,7 +1363,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1359,7 +1376,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1465,7 +1482,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1474,7 +1491,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1542,7 +1559,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1676,7 +1693,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1713,7 +1730,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1727,7 +1743,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1741,7 +1757,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index 823d8b7c4224..d40761903429 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -71,7 +71,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -296,7 +295,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -308,7 +307,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
index 21db53f9e9d6..c66324d41a89 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
@@ -1,10 +1,72 @@
[
{
+ "BriefDescription": "Hit snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's cache, after the data is forwarded back to the requestor and indicating the data was found unmodified in the (FE) Forward or Exclusive State in this cores caches cache. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's caches, after the data is forwarded back to the requestor, and indicating the data was found modified(M) in this cores caches cache (aka HitM response). A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated in this core's caches without forwarded back to the requestor. The line was in Forward, Shared or Exclusive (FSE) state in this cores caches. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Line not found snoop reply",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.MISS",
+ "PublicDescription": "Counts responses to snoops indicating that the data was not found (IHitI) in this core's caches. A single snoop response from the core counts on all hyperthreads of the Core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Hit snoop reply with data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (FS) Forward or Shared state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line kept in Shared state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (M)odified state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line was kept on this core in the (S)hared state, and that the data was found unmodified but not forwarded back to the requestor, initially the data was found in the cache in the (FSE) Forward, Shared state or Exclusive state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "L1D.HWPF_MISS",
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -13,7 +75,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -22,7 +84,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -33,7 +95,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -43,7 +105,6 @@
"Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PublicDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -52,7 +113,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -61,7 +122,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -71,7 +132,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -80,7 +141,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f"
},
@@ -89,7 +150,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2"
},
@@ -98,7 +159,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -107,7 +168,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -116,7 +177,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -125,7 +186,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -134,7 +195,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4"
},
@@ -143,7 +204,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1"
},
@@ -152,7 +213,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27"
},
@@ -161,7 +222,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7"
},
@@ -170,7 +231,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0"
},
@@ -179,7 +239,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2"
},
@@ -188,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4"
},
@@ -197,7 +257,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24"
},
@@ -206,7 +266,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1"
},
@@ -215,7 +275,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21"
},
@@ -224,7 +284,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30"
},
@@ -233,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -242,7 +301,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -251,7 +310,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2"
},
@@ -260,7 +319,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22"
},
@@ -269,7 +328,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8"
},
@@ -278,7 +337,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28"
},
@@ -287,7 +346,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40"
},
@@ -296,7 +355,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41"
},
@@ -305,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f"
},
@@ -394,7 +453,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd"
},
@@ -582,7 +641,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -591,7 +649,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -1073,7 +1131,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "OFFCORE_REQUESTS.ALL_REQUESTS Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -1082,7 +1139,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1091,7 +1148,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1100,7 +1157,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1109,7 +1166,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -1119,7 +1176,6 @@
"Deprecated": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1129,7 +1185,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1139,7 +1194,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1149,7 +1204,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1159,7 +1213,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1168,7 +1221,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1177,7 +1229,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1186,7 +1238,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -1195,7 +1247,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -1204,7 +1256,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf"
},
@@ -1213,7 +1264,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1222,7 +1273,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1231,7 +1282,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1240,7 +1291,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
index 8c9207750c82..bc475e163227 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
@@ -5,7 +5,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -23,7 +22,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -32,7 +30,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -41,7 +38,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -50,7 +46,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -59,7 +54,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -68,7 +62,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -77,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -86,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -95,7 +87,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -104,7 +96,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -122,7 +114,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18"
},
@@ -131,7 +123,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -140,7 +132,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -149,7 +141,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x60"
},
@@ -158,7 +150,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -167,7 +159,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -176,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc"
},
@@ -194,7 +186,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -203,7 +194,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -212,7 +202,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -221,7 +210,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -230,7 +218,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR",
"SampleAfterValue": "100003",
"UMask": "0x3"
},
@@ -239,7 +227,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -248,7 +235,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.VECTOR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR",
"SampleAfterValue": "100003",
"UMask": "0x1c"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
index 9fe9d62b867a..793c486ffabe 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -22,7 +22,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2"
},
@@ -31,7 +30,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -249,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -260,7 +259,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -269,7 +267,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -279,7 +277,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -289,7 +287,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -298,7 +296,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -308,7 +306,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -318,7 +316,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -327,7 +325,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -337,7 +335,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -348,7 +346,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -357,7 +355,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -366,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -376,7 +374,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -387,7 +385,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -396,7 +394,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -406,7 +404,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -417,7 +415,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
index 7c3f9b76d367..5e6c1f05c981 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -24,7 +23,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -34,7 +32,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -44,7 +41,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -54,7 +51,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -478,7 +475,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -487,7 +483,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -505,7 +501,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt).",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -514,7 +510,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -523,7 +519,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -532,7 +528,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -541,7 +537,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Counts the number of times RTM commit succeeded. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -550,7 +546,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.START",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -559,7 +555,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_READ",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -568,7 +564,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes. Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -577,7 +573,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
"SampleAfterValue": "100003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
index a58d65556609..21f49f609ed4 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
@@ -4,11 +4,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
{
+ "BriefDescription": "HW_INTERRUPTS.MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -25,7 +49,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
index 48bec483b49a..1fa7957956df 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
@@ -6,7 +6,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -16,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -27,7 +26,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -37,7 +35,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -48,7 +45,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -57,7 +53,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b"
},
@@ -217,7 +213,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -226,7 +222,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -235,7 +231,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70"
},
@@ -244,7 +240,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -253,7 +249,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2"
},
@@ -262,7 +258,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -273,7 +268,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -282,7 +276,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -299,7 +293,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -316,7 +310,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003"
},
{
@@ -325,7 +319,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -335,7 +328,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -345,7 +337,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -355,7 +346,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -365,7 +355,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -375,7 +364,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -384,7 +372,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb7",
"EventName": "EXE.AMX_BUSY",
- "PublicDescription": "Counts the cycles where the AMX (Advance Matrix Extension) unit is busy performing an operation. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -393,7 +380,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -402,7 +389,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc"
},
@@ -411,7 +397,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -420,7 +406,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -429,7 +415,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -439,7 +425,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21"
},
@@ -449,7 +434,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -458,7 +443,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -467,7 +452,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -492,7 +477,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -501,7 +485,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -518,7 +502,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -529,7 +513,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -538,7 +522,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80"
},
@@ -547,7 +531,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.MBA_STALLS",
- "PublicDescription": "INT_MISC.MBA_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -556,7 +539,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -567,7 +550,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -576,7 +558,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -585,7 +567,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13"
},
@@ -594,7 +575,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac"
},
@@ -603,7 +583,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -612,7 +592,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -621,7 +601,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -630,7 +609,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -639,7 +617,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -648,7 +625,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -657,7 +633,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -666,7 +642,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88"
},
@@ -675,7 +651,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82"
},
@@ -684,7 +660,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -694,7 +670,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -704,7 +680,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -713,7 +689,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -724,7 +700,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -733,7 +709,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -742,7 +718,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20"
},
@@ -751,7 +727,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -760,7 +736,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -769,7 +745,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -778,7 +753,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -790,7 +765,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -799,7 +774,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when Reservation Station (RS) is empty due to a resource in the back-end Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -812,7 +786,6 @@
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -822,7 +795,6 @@
"Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -831,7 +803,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
@@ -840,7 +812,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -849,7 +821,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
@@ -858,7 +830,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10"
},
@@ -875,7 +846,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
},
@@ -884,7 +855,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "UOPS_DECODED.DEC0_UOPS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -893,7 +863,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -902,7 +872,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -911,7 +881,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -920,7 +890,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -929,7 +899,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -938,7 +908,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -947,7 +917,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
@@ -956,7 +926,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -966,7 +936,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -976,7 +946,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -986,7 +956,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -996,7 +966,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1006,7 +976,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1016,7 +986,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1026,7 +996,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1036,7 +1006,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1047,7 +1017,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1059,7 +1029,6 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1068,7 +1037,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1077,7 +1045,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -1086,7 +1054,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1096,7 +1064,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1106,7 +1073,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1115,7 +1082,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1126,7 +1093,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -1135,7 +1101,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "Counts the retirement slots used each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1146,7 +1112,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1158,7 +1124,6 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index fe3f288be10e..b6a28227a58a 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -40,6 +40,18 @@
"ScaleUnit": "1per_instr"
},
{
+ "BriefDescription": "The average number of cores that are in cstate C0 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c0"
+ },
+ {
+ "BriefDescription": "The average number of cores are in cstate C6 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c6"
+ },
+ {
"BriefDescription": "CPU operating frequency (in GHz)",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
"MetricName": "cpu_operating_frequency",
@@ -91,6 +103,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_read_local",
@@ -109,6 +127,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_write_local",
@@ -123,19 +147,19 @@
{
"BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
- "MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
+ "MetricName": "io_full_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
- "MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
+ "MetricName": "io_partial_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
- "MetricName": "io_percent_of_inbound_reads_that_miss_l3",
+ "MetricName": "io_read_l3_miss",
"ScaleUnit": "100%"
},
{
@@ -395,7 +419,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -430,39 +454,39 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -470,7 +494,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -478,7 +502,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -494,7 +518,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -510,7 +534,7 @@
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -611,7 +635,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(76.6 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 74.6 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -631,6 +654,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "74.6 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -660,7 +692,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_cxl_mem_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -718,7 +750,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -846,7 +878,7 @@
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1357,19 +1389,19 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "CacheHits;Offcore",
+ "MetricGroup": "CacheHits;Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
},
{
@@ -1395,21 +1427,21 @@
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_dram_bw",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_l3m_bw",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_offcore_bw",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
@@ -1439,7 +1471,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1486,7 +1518,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1498,16 +1530,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1572,8 +1616,14 @@
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
+ "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
+ "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@ if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
+ "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1734,12 +1784,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1753,7 +1803,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "4.4 * tma_info_system_core_frequency * MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -1772,12 +1821,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "32.6 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1860,6 +1908,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1892,7 +1941,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1901,13 +1950,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1917,7 +1966,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -1970,7 +2018,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2005,6 +2053,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2066,6 +2115,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2075,6 +2125,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + max(RS.EMPTY_RESOURCE - RESOURCE_STALLS.SCOREBOARD, 0)) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2084,6 +2135,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2093,7 +2145,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2103,7 +2154,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2132,7 +2182,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2160,7 +2210,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2192,7 +2241,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
index a38db3e253f2..1bdda3c3ccbf 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
@@ -312,6 +312,17 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Distress signal asserted : DPT Remote",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xaf",
+ "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_NONLOCAL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Distress signal asserted : DPT Remote : Counts the number of cycles either the local or incoming distress signals are asserted. : Dynamic Prefetch Throttle received by this tile",
+ "UMask": "0x8",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Egress Blocking due to Ordering requirements : Down",
"Counter": "0,1,2,3",
"EventCode": "0xba",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
index 68be01dad7c9..30044177ccf8 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
@@ -2770,6 +2770,88 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "ECC Correctable Errors",
"Counter": "0,1,2,3",
"EventCode": "0x09",
@@ -3048,6 +3130,28 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1. : Thermal throttling is performed per DIMM. We support 3 DIMMs per channel. This ID allows us to filter by ID.",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1.",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "Precharge due to read, write, underfill, or PGT.",
"Counter": "0,1,2,3",
"EventCode": "0x03",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json
index 9482ddaea4d1..71c35b165a3e 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json
@@ -178,7 +178,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C0 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
@@ -198,7 +197,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x37",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C6 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json
index 3d3f88600e26..609a9549cbf3 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -14,7 +14,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -23,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -32,7 +32,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -41,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -50,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -59,7 +59,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -68,7 +68,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -78,7 +78,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -87,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -96,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -105,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -114,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -123,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -142,7 +142,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -151,7 +151,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -160,7 +160,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -169,7 +169,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -178,7 +178,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
}
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/cache.json b/tools/perf/pmu-events/arch/x86/sierraforest/cache.json
index 877052db1490..de0e7661a52d 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/cache.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/cache.json
@@ -163,6 +163,14 @@
"UMask": "0x6"
},
{
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC, no snoop was required. LLC provides the data. If the core has access to an L3 cache, an LLC hit refers to an L3 cache hit, otherwise it counts zeros.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT_NOSNOOP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
"BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches. If the core has access to an L3 cache, an LLC miss refers to an L3 cache miss, otherwise it is an L2 cache miss.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x34",
@@ -179,6 +187,14 @@
"UMask": "0x80"
},
{
+ "BriefDescription": "Counts the total number of load ops retired that miss the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd3",
@@ -187,6 +203,31 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in a Remote DRAM",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_OR_NOFWD",
+ "PublicDescription": "Counts the number of load ops retired that miss the L3 cache and hit in a Remote DRAM, OR had a Remote snoop miss/no fwd and hit in the Local Dram",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in a Remote Cache and modified data was forwarded",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_HITM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in a Remote Cache and non-modified data was forwarded",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_NONM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd1",
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json
index b9f3c611d87b..ca2c55917e55 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -735,7 +735,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricName": "tma_info_system_cpu_utilization"
},
{
@@ -747,7 +747,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P:k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json
index 3d1fb5f0417e..35b1763b0020 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json
@@ -10,6 +10,15 @@
"Unit": "CHACMS"
},
{
+ "BriefDescription": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "Unit": "CHACMS"
+ },
+ {
"BriefDescription": "Counts the number of cycles FAST trigger is received from the global FAST distress wire.",
"Counter": "0,1,2,3",
"EventCode": "0x34",
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json
index 952b6de3fefc..251e5d20fefe 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json
@@ -839,12 +839,20 @@
"Counter": "0,1,2,3",
"EventCode": "0x1F",
"EventName": "UNC_I_MISC1.LOST_FWD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x10",
"Unit": "IRP"
},
{
+ "BriefDescription": "Misc Events - Set 1 : Received Invalid : Secondary received a transfer that did not have sufficient MESI state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1F",
+ "EventName": "UNC_I_MISC1.SEC_RCVD_INVLD",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "IRP"
+ },
+ {
"BriefDescription": "Snoop Hit E/S responses",
"Counter": "0,1,2,3",
"EventCode": "0x12",
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json
index f4f956966e16..2ea2637df3fb 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json
@@ -1321,7 +1321,6 @@
"FCMask": "0x01",
"PerPkg": "1",
"PortMask": "0x0FF",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IIO"
},
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json
index c7e9dbe02eb0..a9fd7a34b24b 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json
@@ -60,6 +60,33 @@
"BriefDescription": "CAS count for SubChannel 0 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
"EventName": "UNC_M_CAS_COUNT_SCH0.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -75,6 +102,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 0 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 0, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x05",
@@ -125,6 +161,33 @@
"BriefDescription": "CAS count for SubChannel 1 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
"EventName": "UNC_M_CAS_COUNT_SCH1.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -140,6 +203,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 1 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 1, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x06",
@@ -195,7 +267,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -206,7 +277,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -217,7 +287,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -228,7 +297,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -239,7 +307,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -250,7 +317,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -261,7 +327,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -272,7 +337,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -283,7 +347,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -294,7 +357,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -305,7 +367,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -316,7 +377,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -327,7 +387,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x10",
"Unit": "IMC"
},
@@ -338,7 +397,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x20",
"Unit": "IMC"
},
@@ -349,7 +407,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x40",
"Unit": "IMC"
},
@@ -360,7 +417,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x80",
"Unit": "IMC"
},
@@ -371,7 +427,6 @@
"EventName": "UNC_M_POWER_CHANNEL_PPD_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"Unit": "IMC"
},
{
@@ -381,7 +436,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -392,7 +446,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -423,7 +476,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.MR4BLKEN",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -434,7 +486,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.RAPLBLK",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -617,7 +668,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -628,7 +678,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -639,7 +688,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -650,7 +698,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -661,7 +708,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -672,7 +718,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -683,7 +728,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -694,7 +738,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -705,7 +748,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -716,7 +758,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 2d3a037e88b5..707bd8790ed2 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,6 +80,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -117,6 +118,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -131,31 +133,35 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
@@ -163,6 +169,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
@@ -171,6 +178,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -179,6 +187,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -187,6 +196,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -195,7 +205,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -203,6 +214,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -230,6 +242,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -391,7 +404,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -454,7 +467,6 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "FP_ARITH_INST_RETIRED.VECTOR / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
@@ -520,6 +532,7 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_bottleneck_mispredictions * tma_info_thread_slots / 4 / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -555,6 +568,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -572,6 +586,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -713,7 +728,6 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + FP_ARITH_INST_RETIRED.VECTOR)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
@@ -975,7 +989,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -993,6 +1007,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1008,7 +1028,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1020,7 +1040,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1029,7 +1049,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1175,12 +1195,13 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1217,7 +1238,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1241,6 +1262,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1267,6 +1289,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_1G / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_1g",
@@ -1275,6 +1298,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_2m",
@@ -1283,6 +1307,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_4K / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_4k",
@@ -1291,6 +1316,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (9 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1315,7 +1341,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1324,7 +1350,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1348,7 +1374,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
@@ -1358,6 +1383,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
@@ -1412,6 +1438,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1421,6 +1448,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
"MetricGroup": "BrMispredicts;BvIO;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
"MetricName": "tma_other_mispredicts",
@@ -1429,6 +1457,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
"MetricGroup": "BvIO;Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_other_nukes",
@@ -1509,6 +1538,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -1595,7 +1625,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1652,6 +1682,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_1G / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_1g",
@@ -1660,6 +1691,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_2m",
@@ -1668,6 +1700,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_4K / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_4k",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 7cc7b076c3e2..56c2caaafef4 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -301,6 +301,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -338,6 +339,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -352,31 +354,35 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
@@ -384,6 +390,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
@@ -392,6 +399,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -400,6 +408,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -408,6 +417,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -416,7 +426,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -424,6 +435,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -451,6 +463,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -612,7 +625,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -675,7 +688,6 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
@@ -750,6 +762,7 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_bottleneck_mispredictions * tma_info_thread_slots / 4 / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -785,6 +798,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -802,6 +816,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -943,7 +958,6 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
@@ -1225,7 +1239,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1243,6 +1257,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1258,7 +1278,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1270,7 +1290,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1279,7 +1299,7 @@
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1475,12 +1495,13 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1517,7 +1538,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1541,6 +1562,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1567,6 +1589,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_1G / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_1g",
@@ -1575,6 +1598,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_2m",
@@ -1583,6 +1607,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_4K / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_4k",
@@ -1600,6 +1625,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1624,7 +1650,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1633,7 +1659,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1657,7 +1683,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
@@ -1667,6 +1692,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
@@ -1721,6 +1747,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1730,6 +1757,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
"MetricGroup": "BrMispredicts;BvIO;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
"MetricName": "tma_other_mispredicts",
@@ -1738,6 +1766,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
"MetricGroup": "BvIO;Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_other_nukes",
@@ -1818,6 +1847,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -1923,7 +1953,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1980,6 +2010,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_1G / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_1g",
@@ -1988,6 +2019,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_2m",
@@ -1996,6 +2028,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_4K / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_4k",
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
index 2db7a70f7a07..908da985c594 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
@@ -1,63 +1,63 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C9_Pkg_Residency",
"ScaleUnit": "100%"
@@ -85,7 +85,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -134,6 +133,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -148,39 +148,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -188,6 +193,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -196,6 +202,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -204,6 +211,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -212,7 +220,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -220,6 +229,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -427,7 +437,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -619,6 +629,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_lsd + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1074,7 +1085,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1098,6 +1109,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1113,7 +1130,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1125,7 +1142,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1134,7 +1151,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1165,6 +1182,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -1316,12 +1334,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1345,7 +1363,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1359,7 +1376,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1465,7 +1482,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1474,7 +1491,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1542,7 +1559,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1676,7 +1693,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1713,7 +1730,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1727,7 +1743,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1741,7 +1757,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c
index 041c598b16d8..76c395cf513c 100644
--- a/tools/perf/pmu-events/empty-pmu-events.c
+++ b/tools/perf/pmu-events/empty-pmu-events.c
@@ -19,236 +19,2753 @@ struct pmu_table_entry {
};
static const char *const big_c_string =
-/* offset=0 */ "software\000"
-/* offset=9 */ "cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\00000\000\000\000\000\000"
-/* offset=87 */ "task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\00000\000\000\000\000\000"
-/* offset=167 */ "faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000"
-/* offset=262 */ "page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000"
-/* offset=357 */ "context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000"
-/* offset=458 */ "cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000"
-/* offset=559 */ "cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000"
-/* offset=691 */ "migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000"
-/* offset=823 */ "minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000"
-/* offset=932 */ "major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000"
-/* offset=1035 */ "alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000"
-/* offset=1127 */ "emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000"
-/* offset=1254 */ "dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000"
-/* offset=1334 */ "bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000"
-/* offset=1436 */ "cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000"
-/* offset=1539 */ "tool\000"
-/* offset=1544 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000"
-/* offset=1620 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000"
-/* offset=1690 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000"
-/* offset=1758 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000"
-/* offset=1834 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000"
-/* offset=1979 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000"
-/* offset=2082 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000"
-/* offset=2199 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000"
-/* offset=2275 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000"
-/* offset=2361 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000"
-/* offset=2471 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000"
-/* offset=2578 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000"
-/* offset=2677 */ "default_core\000"
-/* offset=2690 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000"
-/* offset=2752 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000"
-/* offset=2814 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000"
-/* offset=2912 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000"
-/* offset=3014 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000"
-/* offset=3147 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000"
-/* offset=3265 */ "hisi_sccl,ddrc\000"
-/* offset=3280 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000"
-/* offset=3350 */ "uncore_cbox\000"
-/* offset=3362 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000"
-/* offset=3516 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000"
-/* offset=3570 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000"
-/* offset=3628 */ "hisi_sccl,l3c\000"
-/* offset=3642 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000"
-/* offset=3710 */ "uncore_imc_free_running\000"
-/* offset=3734 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000"
-/* offset=3814 */ "uncore_imc\000"
-/* offset=3825 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000"
-/* offset=3890 */ "uncore_sys_ddr_pmu\000"
-/* offset=3909 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000"
-/* offset=3985 */ "uncore_sys_ccn_pmu\000"
-/* offset=4004 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000"
-/* offset=4081 */ "uncore_sys_cmn_pmu\000"
-/* offset=4100 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000"
-/* offset=4243 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000"
-/* offset=4265 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000"
-/* offset=4328 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000"
-/* offset=4494 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000"
-/* offset=4558 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000"
-/* offset=4625 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000"
-/* offset=4696 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000"
-/* offset=4790 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000"
-/* offset=4924 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000"
-/* offset=4988 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000"
-/* offset=5056 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000"
-/* offset=5126 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000"
-/* offset=5148 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000"
-/* offset=5170 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000"
-/* offset=5190 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000"
+/* offset=0 */ "default_core\000"
+/* offset=13 */ "l1-dcache\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=99 */ "l1-dcache-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=190 */ "l1-dcache-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=286 */ "l1-dcache-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=387 */ "l1-dcache-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=482 */ "l1-dcache-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=580 */ "l1-dcache-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00000\000\000\000\000\000"
+/* offset=682 */ "l1-dcache-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=782 */ "l1-dcache-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00000\000\000\000\000\000"
+/* offset=874 */ "l1-dcache-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=971 */ "l1-dcache-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1073 */ "l1-dcache-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1169 */ "l1-dcache-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1268 */ "l1-dcache-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=1371 */ "l1-dcache-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=1472 */ "l1-dcache-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1563 */ "l1-dcache-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1659 */ "l1-dcache-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1760 */ "l1-dcache-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1855 */ "l1-dcache-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=1953 */ "l1-dcache-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=2055 */ "l1-dcache-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=2155 */ "l1-dcache-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=2252 */ "l1-dcache-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=2354 */ "l1-dcache-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=2461 */ "l1-dcache-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=2562 */ "l1-dcache-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=2666 */ "l1-dcache-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00000\000\000\000\000\000"
+/* offset=2770 */ "l1-dcache-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=2872 */ "l1-dcache-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00000\000\000\000\000\000"
+/* offset=2970 */ "l1-dcache-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3073 */ "l1-dcache-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3181 */ "l1-dcache-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3283 */ "l1-dcache-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3388 */ "l1-dcache-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=3493 */ "l1-dcache-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=3596 */ "l1-dcache-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3693 */ "l1-dcache-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3795 */ "l1-dcache-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=3902 */ "l1-dcache-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=4003 */ "l1-dcache-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=4107 */ "l1-dcache-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=4211 */ "l1-dcache-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=4313 */ "l1-dcache-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=4416 */ "l1-dcache-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=4524 */ "l1-dcache-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=4637 */ "l1-dcache-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=4744 */ "l1-dcache-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=4854 */ "l1-dcache-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00000\000\000\000\000\000"
+/* offset=4964 */ "l1-dcache-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=5072 */ "l1-dcache-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00000\000\000\000\000\000"
+/* offset=5177 */ "l1-dcache-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=5287 */ "l1-dcache-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=5402 */ "l1-dcache-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=5511 */ "l1-dcache-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=5623 */ "l1-dcache-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=5735 */ "l1-dcache-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=5845 */ "l1-dcache-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=5956 */ "l1-dcache-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6072 */ "l1-dcache-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6193 */ "l1-dcache-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6308 */ "l1-dcache-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6426 */ "l1-dcache-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=6544 */ "l1-dcache-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=6660 */ "l1-dcache-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6771 */ "l1-dcache-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=6887 */ "l1-dcache-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=7008 */ "l1-dcache-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=7123 */ "l1-dcache-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=7241 */ "l1-dcache-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=7359 */ "l1-dcache-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=7475 */ "l1-dcache-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=7566 */ "l1-dcache-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=7662 */ "l1-dcache-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=7752 */ "l1-dcache-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=7845 */ "l1-dcache-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=7942 */ "l1-dcache-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=8037 */ "l1-d\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8118 */ "l1-d-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8204 */ "l1-d-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8295 */ "l1-d-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8391 */ "l1-d-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8481 */ "l1-d-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8574 */ "l1-d-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=8671 */ "l1-d-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=8766 */ "l1-d-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8853 */ "l1-d-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=8945 */ "l1-d-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9042 */ "l1-d-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9133 */ "l1-d-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9227 */ "l1-d-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=9325 */ "l1-d-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=9421 */ "l1-d-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9507 */ "l1-d-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9598 */ "l1-d-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9694 */ "l1-d-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9784 */ "l1-d-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=9877 */ "l1-d-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=9974 */ "l1-d-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=10069 */ "l1-d-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10161 */ "l1-d-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10258 */ "l1-d-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10360 */ "l1-d-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10456 */ "l1-d-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10555 */ "l1-d-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=10654 */ "l1-d-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=10751 */ "l1-d-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10844 */ "l1-d-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=10942 */ "l1-d-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11045 */ "l1-d-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11142 */ "l1-d-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11242 */ "l1-d-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=11342 */ "l1-d-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=11440 */ "l1-d-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11532 */ "l1-d-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11629 */ "l1-d-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11731 */ "l1-d-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11827 */ "l1-d-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=11926 */ "l1-d-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=12025 */ "l1-d-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=12122 */ "l1-d-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12220 */ "l1-d-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12323 */ "l1-d-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12431 */ "l1-d-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12533 */ "l1-d-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12638 */ "l1-d-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=12743 */ "l1-d-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=12846 */ "l1-d-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=12946 */ "l1-d-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13051 */ "l1-d-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13161 */ "l1-d-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13265 */ "l1-d-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13372 */ "l1-d-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=13479 */ "l1-d-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=13584 */ "l1-d-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13690 */ "l1-d-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13801 */ "l1-d-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=13917 */ "l1-d-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14027 */ "l1-d-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14140 */ "l1-d-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=14253 */ "l1-d-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=14364 */ "l1-d-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14470 */ "l1-d-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14581 */ "l1-d-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14697 */ "l1-d-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14807 */ "l1-d-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=14920 */ "l1-d-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=15033 */ "l1-d-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=15144 */ "l1-d-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15230 */ "l1-d-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15321 */ "l1-d-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15406 */ "l1-d-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15494 */ "l1-d-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=15586 */ "l1-d-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=15676 */ "l1d\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15756 */ "l1d-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15841 */ "l1d-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=15931 */ "l1d-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16026 */ "l1d-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16115 */ "l1d-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16207 */ "l1d-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=16303 */ "l1d-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=16397 */ "l1d-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16483 */ "l1d-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16574 */ "l1d-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16670 */ "l1d-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16760 */ "l1d-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=16853 */ "l1d-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=16950 */ "l1d-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=17045 */ "l1d-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=17130 */ "l1d-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=17220 */ "l1d-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=17315 */ "l1d-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=17404 */ "l1d-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=17496 */ "l1d-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=17592 */ "l1d-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=17686 */ "l1d-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=17777 */ "l1d-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=17873 */ "l1d-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=17974 */ "l1d-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18069 */ "l1d-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18167 */ "l1d-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=18265 */ "l1d-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=18361 */ "l1d-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18453 */ "l1d-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18550 */ "l1d-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18652 */ "l1d-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18748 */ "l1d-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=18847 */ "l1d-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=18946 */ "l1d-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=19043 */ "l1d-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=19134 */ "l1d-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=19230 */ "l1d-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=19331 */ "l1d-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=19426 */ "l1d-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=19524 */ "l1d-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=19622 */ "l1d-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=19718 */ "l1d-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=19815 */ "l1d-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=19917 */ "l1d-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20024 */ "l1d-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20125 */ "l1d-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20229 */ "l1d-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=20333 */ "l1d-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=20435 */ "l1d-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20534 */ "l1d-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20638 */ "l1d-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20747 */ "l1d-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20850 */ "l1d-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=20956 */ "l1d-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=21062 */ "l1d-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=21166 */ "l1d-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=21271 */ "l1d-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=21381 */ "l1d-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=21496 */ "l1d-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=21605 */ "l1d-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=21717 */ "l1d-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=21829 */ "l1d-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=21939 */ "l1d-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=22044 */ "l1d-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=22154 */ "l1d-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=22269 */ "l1d-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=22378 */ "l1d-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=22490 */ "l1d-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=22602 */ "l1d-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=22712 */ "l1d-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=22797 */ "l1d-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=22887 */ "l1d-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=22971 */ "l1d-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23058 */ "l1d-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=23149 */ "l1d-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=23238 */ "l1-data\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23322 */ "l1-data-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23411 */ "l1-data-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23505 */ "l1-data-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23604 */ "l1-data-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23697 */ "l1-data-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=23793 */ "l1-data-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=23893 */ "l1-data-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=23991 */ "l1-data-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24081 */ "l1-data-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24176 */ "l1-data-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24276 */ "l1-data-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24370 */ "l1-data-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24467 */ "l1-data-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=24568 */ "l1-data-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=24667 */ "l1-data-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24756 */ "l1-data-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24850 */ "l1-data-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=24949 */ "l1-data-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=25042 */ "l1-data-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=25138 */ "l1-data-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=25238 */ "l1-data-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=25336 */ "l1-data-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=25431 */ "l1-data-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=25531 */ "l1-data-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=25636 */ "l1-data-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=25735 */ "l1-data-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=25837 */ "l1-data-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=25939 */ "l1-data-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=26039 */ "l1-data-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26135 */ "l1-data-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26236 */ "l1-data-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26342 */ "l1-data-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26442 */ "l1-data-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26545 */ "l1-data-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=26648 */ "l1-data-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=26749 */ "l1-data-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26844 */ "l1-data-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=26944 */ "l1-data-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=27049 */ "l1-data-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=27148 */ "l1-data-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000"
+/* offset=27250 */ "l1-data-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=27352 */ "l1-data-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000"
+/* offset=27452 */ "l1-data-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=27553 */ "l1-data-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=27659 */ "l1-data-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=27770 */ "l1-data-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=27875 */ "l1-data-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=27983 */ "l1-data-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=28091 */ "l1-data-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=28197 */ "l1-data-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=28300 */ "l1-data-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=28408 */ "l1-data-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=28521 */ "l1-data-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=28628 */ "l1-data-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=28738 */ "l1-data-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=28848 */ "l1-data-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=28956 */ "l1-data-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29065 */ "l1-data-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29179 */ "l1-data-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29298 */ "l1-data-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29411 */ "l1-data-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29527 */ "l1-data-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=29643 */ "l1-data-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=29757 */ "l1-data-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29866 */ "l1-data-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=29980 */ "l1-data-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=30099 */ "l1-data-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=30212 */ "l1-data-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000"
+/* offset=30328 */ "l1-data-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=30444 */ "l1-data-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000"
+/* offset=30558 */ "l1-data-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=30647 */ "l1-data-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=30741 */ "l1-data-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=30829 */ "l1-data-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000"
+/* offset=30920 */ "l1-data-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=31015 */ "l1-data-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000"
+/* offset=31108 */ "l1-icache\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31201 */ "l1-icache-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31299 */ "l1-icache-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31402 */ "l1-icache-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31510 */ "l1-icache-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31612 */ "l1-icache-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=31717 */ "l1-icache-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00000\000\000\000\000\000"
+/* offset=31826 */ "l1-icache-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=31933 */ "l1-icache-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00000\000\000\000\000\000"
+/* offset=32032 */ "l1-icache-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32136 */ "l1-icache-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32245 */ "l1-icache-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32348 */ "l1-icache-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32454 */ "l1-icache-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=32564 */ "l1-icache-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=32672 */ "l1-icache-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32770 */ "l1-icache-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32873 */ "l1-icache-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=32981 */ "l1-icache-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=33083 */ "l1-icache-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=33188 */ "l1-icache-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=33297 */ "l1-icache-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=33404 */ "l1-icache-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=33514 */ "l1-icache-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=33629 */ "l1-icache-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=33749 */ "l1-icache-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=33863 */ "l1-icache-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=33980 */ "l1-icache-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00000\000\000\000\000\000"
+/* offset=34097 */ "l1-icache-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=34212 */ "l1-icache-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00000\000\000\000\000\000"
+/* offset=34324 */ "l1-icache-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=34441 */ "l1-icache-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=34563 */ "l1-icache-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=34679 */ "l1-icache-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=34798 */ "l1-icache-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=34917 */ "l1-icache-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=35034 */ "l1-icache-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=35152 */ "l1-icache-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=35275 */ "l1-icache-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=35403 */ "l1-icache-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=35525 */ "l1-icache-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=35650 */ "l1-icache-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=35775 */ "l1-icache-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=35898 */ "l1-icache-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=36016 */ "l1-icache-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=36139 */ "l1-icache-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=36267 */ "l1-icache-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=36389 */ "l1-icache-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=36514 */ "l1-icache-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=36639 */ "l1-icache-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=36762 */ "l1-icache-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=36860 */ "l1-icache-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=36963 */ "l1-icache-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37060 */ "l1-icache-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37160 */ "l1-icache-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=37264 */ "l1-icache-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=37366 */ "l1-i\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37454 */ "l1-i-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37547 */ "l1-i-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37645 */ "l1-i-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37748 */ "l1-i-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37845 */ "l1-i-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=37945 */ "l1-i-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=38049 */ "l1-i-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=38151 */ "l1-i-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38245 */ "l1-i-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38344 */ "l1-i-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38448 */ "l1-i-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38546 */ "l1-i-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38647 */ "l1-i-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=38752 */ "l1-i-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=38855 */ "l1-i-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=38948 */ "l1-i-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=39046 */ "l1-i-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=39149 */ "l1-i-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=39246 */ "l1-i-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=39346 */ "l1-i-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=39450 */ "l1-i-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=39552 */ "l1-i-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=39657 */ "l1-i-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=39767 */ "l1-i-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=39882 */ "l1-i-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=39991 */ "l1-i-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40103 */ "l1-i-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=40215 */ "l1-i-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=40325 */ "l1-i-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40432 */ "l1-i-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40544 */ "l1-i-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40661 */ "l1-i-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40772 */ "l1-i-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=40886 */ "l1-i-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=41000 */ "l1-i-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=41112 */ "l1-i-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=41225 */ "l1-i-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=41343 */ "l1-i-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=41466 */ "l1-i-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=41583 */ "l1-i-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=41703 */ "l1-i-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=41823 */ "l1-i-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=41941 */ "l1-i-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=42054 */ "l1-i-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=42172 */ "l1-i-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=42295 */ "l1-i-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=42412 */ "l1-i-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=42532 */ "l1-i-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=42652 */ "l1-i-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=42770 */ "l1-i-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=42863 */ "l1-i-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=42961 */ "l1-i-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43053 */ "l1-i-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43148 */ "l1-i-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=43247 */ "l1-i-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=43344 */ "l1i\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43431 */ "l1i-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43523 */ "l1i-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43620 */ "l1i-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43722 */ "l1i-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43818 */ "l1i-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=43917 */ "l1i-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=44020 */ "l1i-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=44121 */ "l1i-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44214 */ "l1i-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44312 */ "l1i-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44415 */ "l1i-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44512 */ "l1i-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44612 */ "l1i-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=44716 */ "l1i-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=44818 */ "l1i-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=44910 */ "l1i-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=45007 */ "l1i-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=45109 */ "l1i-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=45205 */ "l1i-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=45304 */ "l1i-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=45407 */ "l1i-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=45508 */ "l1i-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=45612 */ "l1i-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=45721 */ "l1i-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=45835 */ "l1i-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=45943 */ "l1i-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46054 */ "l1i-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=46165 */ "l1i-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=46274 */ "l1i-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46380 */ "l1i-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46491 */ "l1i-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46607 */ "l1i-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46717 */ "l1i-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=46830 */ "l1i-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=46943 */ "l1i-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=47054 */ "l1i-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47166 */ "l1i-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47283 */ "l1i-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47405 */ "l1i-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47521 */ "l1i-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47640 */ "l1i-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=47759 */ "l1i-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=47876 */ "l1i-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=47988 */ "l1i-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=48105 */ "l1i-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=48227 */ "l1i-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=48343 */ "l1i-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=48462 */ "l1i-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=48581 */ "l1i-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=48698 */ "l1i-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=48790 */ "l1i-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=48887 */ "l1i-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=48978 */ "l1i-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49072 */ "l1i-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=49170 */ "l1i-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=49266 */ "l1-instruction\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49364 */ "l1-instruction-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49467 */ "l1-instruction-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49575 */ "l1-instruction-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49688 */ "l1-instruction-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49795 */ "l1-instruction-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=49905 */ "l1-instruction-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=50019 */ "l1-instruction-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=50131 */ "l1-instruction-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=50235 */ "l1-instruction-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=50344 */ "l1-instruction-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=50458 */ "l1-instruction-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=50566 */ "l1-instruction-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=50677 */ "l1-instruction-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=50792 */ "l1-instruction-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=50905 */ "l1-instruction-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=51008 */ "l1-instruction-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=51116 */ "l1-instruction-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=51229 */ "l1-instruction-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=51336 */ "l1-instruction-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=51446 */ "l1-instruction-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=51560 */ "l1-instruction-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=51672 */ "l1-instruction-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=51787 */ "l1-instruction-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=51907 */ "l1-instruction-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52032 */ "l1-instruction-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52151 */ "l1-instruction-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52273 */ "l1-instruction-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=52395 */ "l1-instruction-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=52515 */ "l1-instruction-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52632 */ "l1-instruction-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52754 */ "l1-instruction-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=52881 */ "l1-instruction-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53002 */ "l1-instruction-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53126 */ "l1-instruction-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=53250 */ "l1-instruction-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=53372 */ "l1-instruction-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53495 */ "l1-instruction-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53623 */ "l1-instruction-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53756 */ "l1-instruction-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=53883 */ "l1-instruction-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54013 */ "l1-instruction-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=54143 */ "l1-instruction-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=54271 */ "l1-instruction-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54394 */ "l1-instruction-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54522 */ "l1-instruction-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54655 */ "l1-instruction-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54782 */ "l1-instruction-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000"
+/* offset=54912 */ "l1-instruction-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=55042 */ "l1-instruction-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000"
+/* offset=55170 */ "l1-instruction-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=55273 */ "l1-instruction-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=55381 */ "l1-instruction-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=55483 */ "l1-instruction-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000"
+/* offset=55588 */ "l1-instruction-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=55697 */ "l1-instruction-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000"
+/* offset=55804 */ "llc\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=55882 */ "llc-load\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=55965 */ "llc-load-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56053 */ "llc-load-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56146 */ "llc-load-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56233 */ "llc-load-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56323 */ "llc-load-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00000\000\000\000\000\000"
+/* offset=56417 */ "llc-load-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=56509 */ "llc-loads\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00000\000\000\000\000\000"
+/* offset=56593 */ "llc-loads-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56682 */ "llc-loads-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56776 */ "llc-loads-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56864 */ "llc-loads-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=56955 */ "llc-loads-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=57050 */ "llc-loads-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=57143 */ "llc-read\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=57226 */ "llc-read-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=57314 */ "llc-read-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=57407 */ "llc-read-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=57494 */ "llc-read-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=57584 */ "llc-read-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=57678 */ "llc-read-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=57770 */ "llc-store\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=57859 */ "llc-store-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=57953 */ "llc-store-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58052 */ "llc-store-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58145 */ "llc-store-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58241 */ "llc-store-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00000\000\000\000\000\000"
+/* offset=58337 */ "llc-store-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=58431 */ "llc-stores\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00000\000\000\000\000\000"
+/* offset=58521 */ "llc-stores-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58616 */ "llc-stores-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58716 */ "llc-stores-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58810 */ "llc-stores-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=58907 */ "llc-stores-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=59004 */ "llc-stores-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=59099 */ "llc-write\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=59188 */ "llc-write-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=59282 */ "llc-write-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=59381 */ "llc-write-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=59474 */ "llc-write-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=59570 */ "llc-write-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=59666 */ "llc-write-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=59760 */ "llc-prefetch\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=59855 */ "llc-prefetch-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=59955 */ "llc-prefetch-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60060 */ "llc-prefetch-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60159 */ "llc-prefetch-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60261 */ "llc-prefetch-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00000\000\000\000\000\000"
+/* offset=60363 */ "llc-prefetch-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=60463 */ "llc-prefetches\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00000\000\000\000\000\000"
+/* offset=60560 */ "llc-prefetches-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60662 */ "llc-prefetches-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60769 */ "llc-prefetches-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60870 */ "llc-prefetches-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=60974 */ "llc-prefetches-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=61078 */ "llc-prefetches-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=61180 */ "llc-speculative-read\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=61283 */ "llc-speculative-read-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=61391 */ "llc-speculative-read-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=61504 */ "llc-speculative-read-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=61611 */ "llc-speculative-read-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=61721 */ "llc-speculative-read-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=61831 */ "llc-speculative-read-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=61939 */ "llc-speculative-load\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=62042 */ "llc-speculative-load-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=62150 */ "llc-speculative-load-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=62263 */ "llc-speculative-load-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=62370 */ "llc-speculative-load-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=62480 */ "llc-speculative-load-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=62590 */ "llc-speculative-load-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=62698 */ "llc-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=62781 */ "llc-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=62869 */ "llc-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=62951 */ "llc-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63036 */ "llc-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=63125 */ "llc-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=63212 */ "l2\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63309 */ "l2-load\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63411 */ "l2-load-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63518 */ "l2-load-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63630 */ "l2-load-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63736 */ "l2-load-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=63845 */ "l2-load-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=63958 */ "l2-load-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=64069 */ "l2-loads\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64172 */ "l2-loads-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64280 */ "l2-loads-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64393 */ "l2-loads-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64500 */ "l2-loads-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64610 */ "l2-loads-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=64724 */ "l2-loads-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=64836 */ "l2-read\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=64938 */ "l2-read-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=65045 */ "l2-read-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=65157 */ "l2-read-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=65263 */ "l2-read-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=65372 */ "l2-read-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=65485 */ "l2-read-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=65596 */ "l2-store\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=65704 */ "l2-store-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=65817 */ "l2-store-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=65935 */ "l2-store-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66047 */ "l2-store-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66162 */ "l2-store-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=66277 */ "l2-store-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=66390 */ "l2-stores\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66499 */ "l2-stores-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66613 */ "l2-stores-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66732 */ "l2-stores-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66845 */ "l2-stores-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=66961 */ "l2-stores-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=67077 */ "l2-stores-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=67191 */ "l2-write\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=67299 */ "l2-write-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=67412 */ "l2-write-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=67530 */ "l2-write-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=67642 */ "l2-write-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000"
+/* offset=67757 */ "l2-write-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=67872 */ "l2-write-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000"
+/* offset=67985 */ "l2-prefetch\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68099 */ "l2-prefetch-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68218 */ "l2-prefetch-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68342 */ "l2-prefetch-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68460 */ "l2-prefetch-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68581 */ "l2-prefetch-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=68702 */ "l2-prefetch-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=68821 */ "l2-prefetches\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=68937 */ "l2-prefetches-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69058 */ "l2-prefetches-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69184 */ "l2-prefetches-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69304 */ "l2-prefetches-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69427 */ "l2-prefetches-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=69550 */ "l2-prefetches-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=69671 */ "l2-speculative-read\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69793 */ "l2-speculative-read-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=69920 */ "l2-speculative-read-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70052 */ "l2-speculative-read-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70178 */ "l2-speculative-read-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70307 */ "l2-speculative-read-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=70436 */ "l2-speculative-read-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=70563 */ "l2-speculative-load\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70685 */ "l2-speculative-load-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70812 */ "l2-speculative-load-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=70944 */ "l2-speculative-load-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=71070 */ "l2-speculative-load-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000"
+/* offset=71199 */ "l2-speculative-load-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=71328 */ "l2-speculative-load-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000"
+/* offset=71455 */ "l2-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=71557 */ "l2-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=71664 */ "l2-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=71765 */ "l2-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000"
+/* offset=71869 */ "l2-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=71977 */ "l2-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000"
+/* offset=72083 */ "dtlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72154 */ "dtlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72230 */ "dtlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72311 */ "dtlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72397 */ "dtlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72477 */ "dtlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72560 */ "dtlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00000\000\000\000\000\000"
+/* offset=72647 */ "dtlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=72732 */ "dtlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00000\000\000\000\000\000"
+/* offset=72809 */ "dtlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72891 */ "dtlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=72978 */ "dtlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73059 */ "dtlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73143 */ "dtlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=73231 */ "dtlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=73317 */ "dtlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73393 */ "dtlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73474 */ "dtlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73560 */ "dtlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73640 */ "dtlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=73723 */ "dtlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=73810 */ "dtlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=73895 */ "dtlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=73977 */ "dtlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74064 */ "dtlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74156 */ "dtlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74242 */ "dtlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74331 */ "dtlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00000\000\000\000\000\000"
+/* offset=74420 */ "dtlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=74507 */ "dtlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00000\000\000\000\000\000"
+/* offset=74590 */ "dtlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74678 */ "dtlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74771 */ "dtlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74858 */ "dtlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=74948 */ "dtlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=75038 */ "dtlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=75126 */ "dtlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=75208 */ "dtlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=75295 */ "dtlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=75387 */ "dtlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=75473 */ "dtlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=75562 */ "dtlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=75651 */ "dtlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=75738 */ "dtlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=75826 */ "dtlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=75919 */ "dtlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76017 */ "dtlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76109 */ "dtlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76204 */ "dtlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00000\000\000\000\000\000"
+/* offset=76299 */ "dtlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=76392 */ "dtlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00000\000\000\000\000\000"
+/* offset=76482 */ "dtlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76577 */ "dtlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76677 */ "dtlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76771 */ "dtlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=76868 */ "dtlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=76965 */ "dtlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=77060 */ "dtlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77156 */ "dtlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77257 */ "dtlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77363 */ "dtlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77463 */ "dtlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77566 */ "dtlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=77669 */ "dtlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=77770 */ "dtlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77866 */ "dtlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=77967 */ "dtlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=78073 */ "dtlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=78173 */ "dtlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=78276 */ "dtlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=78379 */ "dtlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=78480 */ "dtlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=78556 */ "dtlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=78637 */ "dtlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=78712 */ "dtlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=78790 */ "dtlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=78872 */ "dtlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=78952 */ "d-tlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79024 */ "d-tlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79101 */ "d-tlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79183 */ "d-tlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79270 */ "d-tlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79351 */ "d-tlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79435 */ "d-tlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=79523 */ "d-tlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=79609 */ "d-tlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79687 */ "d-tlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79770 */ "d-tlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79858 */ "d-tlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=79940 */ "d-tlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80025 */ "d-tlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=80114 */ "d-tlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=80201 */ "d-tlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80278 */ "d-tlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80360 */ "d-tlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80447 */ "d-tlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80528 */ "d-tlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=80612 */ "d-tlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=80700 */ "d-tlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=80786 */ "d-tlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=80869 */ "d-tlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=80957 */ "d-tlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81050 */ "d-tlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81137 */ "d-tlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81227 */ "d-tlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=81317 */ "d-tlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=81405 */ "d-tlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81489 */ "d-tlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81578 */ "d-tlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81672 */ "d-tlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81760 */ "d-tlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=81851 */ "d-tlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=81942 */ "d-tlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=82031 */ "d-tlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=82114 */ "d-tlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=82202 */ "d-tlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=82295 */ "d-tlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=82382 */ "d-tlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=82472 */ "d-tlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=82562 */ "d-tlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=82650 */ "d-tlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=82739 */ "d-tlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=82833 */ "d-tlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=82932 */ "d-tlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83025 */ "d-tlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83121 */ "d-tlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=83217 */ "d-tlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=83311 */ "d-tlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83402 */ "d-tlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83498 */ "d-tlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83599 */ "d-tlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83694 */ "d-tlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=83792 */ "d-tlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=83890 */ "d-tlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=83986 */ "d-tlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84083 */ "d-tlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84185 */ "d-tlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84292 */ "d-tlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84393 */ "d-tlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84497 */ "d-tlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=84601 */ "d-tlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=84703 */ "d-tlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84800 */ "d-tlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=84902 */ "d-tlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=85009 */ "d-tlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=85110 */ "d-tlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=85214 */ "d-tlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=85318 */ "d-tlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=85420 */ "d-tlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=85497 */ "d-tlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=85579 */ "d-tlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=85655 */ "d-tlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=85734 */ "d-tlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=85817 */ "d-tlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=85898 */ "data-tlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=85973 */ "data-tlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86053 */ "data-tlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86138 */ "data-tlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86228 */ "data-tlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86312 */ "data-tlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86399 */ "data-tlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=86490 */ "data-tlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=86579 */ "data-tlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86660 */ "data-tlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86746 */ "data-tlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86837 */ "data-tlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=86922 */ "data-tlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87010 */ "data-tlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=87102 */ "data-tlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=87192 */ "data-tlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87272 */ "data-tlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87357 */ "data-tlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87447 */ "data-tlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87531 */ "data-tlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=87618 */ "data-tlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=87709 */ "data-tlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=87798 */ "data-tlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=87884 */ "data-tlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=87975 */ "data-tlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88071 */ "data-tlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88161 */ "data-tlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88254 */ "data-tlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=88347 */ "data-tlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=88438 */ "data-tlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88525 */ "data-tlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88617 */ "data-tlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88714 */ "data-tlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88805 */ "data-tlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=88899 */ "data-tlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=88993 */ "data-tlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=89085 */ "data-tlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=89171 */ "data-tlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=89262 */ "data-tlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=89358 */ "data-tlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=89448 */ "data-tlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000"
+/* offset=89541 */ "data-tlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=89634 */ "data-tlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000"
+/* offset=89725 */ "data-tlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=89817 */ "data-tlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=89914 */ "data-tlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90016 */ "data-tlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90112 */ "data-tlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90211 */ "data-tlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=90310 */ "data-tlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=90407 */ "data-tlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90501 */ "data-tlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90600 */ "data-tlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90704 */ "data-tlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90802 */ "data-tlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=90903 */ "data-tlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=91004 */ "data-tlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=91103 */ "data-tlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91203 */ "data-tlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91308 */ "data-tlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91418 */ "data-tlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91522 */ "data-tlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91629 */ "data-tlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=91736 */ "data-tlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=91841 */ "data-tlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=91941 */ "data-tlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=92046 */ "data-tlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=92156 */ "data-tlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=92260 */ "data-tlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000"
+/* offset=92367 */ "data-tlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=92474 */ "data-tlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000"
+/* offset=92579 */ "data-tlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=92659 */ "data-tlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=92744 */ "data-tlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=92823 */ "data-tlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000"
+/* offset=92905 */ "data-tlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=92991 */ "data-tlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000"
+/* offset=93075 */ "itlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93153 */ "itlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93236 */ "itlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93324 */ "itlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93417 */ "itlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93504 */ "itlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93594 */ "itlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00000\000\000\000\000\000"
+/* offset=93688 */ "itlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=93780 */ "itlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00000\000\000\000\000\000"
+/* offset=93864 */ "itlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=93953 */ "itlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94047 */ "itlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94135 */ "itlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94226 */ "itlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=94321 */ "itlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=94414 */ "itlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94497 */ "itlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94585 */ "itlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94678 */ "itlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94765 */ "itlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=94855 */ "itlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=94949 */ "itlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=95041 */ "itlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95124 */ "itlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95212 */ "itlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95294 */ "itlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95379 */ "itlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=95468 */ "itlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=95555 */ "i-tlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95634 */ "i-tlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95718 */ "i-tlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95807 */ "i-tlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95901 */ "i-tlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=95989 */ "i-tlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96080 */ "i-tlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=96175 */ "i-tlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=96268 */ "i-tlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96353 */ "i-tlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96443 */ "i-tlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96538 */ "i-tlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96627 */ "i-tlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96719 */ "i-tlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=96815 */ "i-tlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=96909 */ "i-tlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=96993 */ "i-tlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97082 */ "i-tlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97176 */ "i-tlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97264 */ "i-tlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97355 */ "i-tlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=97450 */ "i-tlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=97543 */ "i-tlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97627 */ "i-tlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97716 */ "i-tlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97799 */ "i-tlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=97885 */ "i-tlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=97975 */ "i-tlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=98063 */ "instruction-tlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98152 */ "instruction-tlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98246 */ "instruction-tlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98345 */ "instruction-tlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98449 */ "instruction-tlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98547 */ "instruction-tlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98648 */ "instruction-tlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=98753 */ "instruction-tlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=98856 */ "instruction-tlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=98951 */ "instruction-tlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99051 */ "instruction-tlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99156 */ "instruction-tlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99255 */ "instruction-tlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99357 */ "instruction-tlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=99463 */ "instruction-tlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=99567 */ "instruction-tlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99661 */ "instruction-tlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99760 */ "instruction-tlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99864 */ "instruction-tlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=99962 */ "instruction-tlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=100063 */ "instruction-tlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=100168 */ "instruction-tlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=100271 */ "instruction-tlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=100365 */ "instruction-tlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=100464 */ "instruction-tlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=100557 */ "instruction-tlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000"
+/* offset=100653 */ "instruction-tlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=100753 */ "instruction-tlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000"
+/* offset=100851 */ "branch\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=100938 */ "branch-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101030 */ "branch-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101127 */ "branch-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101229 */ "branch-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101325 */ "branch-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101424 */ "branch-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00000\000\000\000\000\000"
+/* offset=101527 */ "branch-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=101628 */ "branch-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00000\000\000\000\000\000"
+/* offset=101721 */ "branch-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101819 */ "branch-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=101922 */ "branch-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102019 */ "branch-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102119 */ "branch-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=102223 */ "branch-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=102325 */ "branch-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102417 */ "branch-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102514 */ "branch-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102616 */ "branch-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102712 */ "branch-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=102811 */ "branch-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=102914 */ "branch-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=103015 */ "branch-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103107 */ "branch-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103204 */ "branch-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103295 */ "branch-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103389 */ "branch-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=103485 */ "branches-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103579 */ "branches-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103678 */ "branches-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103782 */ "branches-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103880 */ "branches-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=103981 */ "branches-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=104086 */ "branches-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=104189 */ "branches-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104284 */ "branches-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104384 */ "branches-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104489 */ "branches-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104588 */ "branches-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104690 */ "branches-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=104796 */ "branches-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=104900 */ "branches-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=104994 */ "branches-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105093 */ "branches-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105197 */ "branches-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105295 */ "branches-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105396 */ "branches-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=105501 */ "branches-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=105604 */ "branches-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105698 */ "branches-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105797 */ "branches-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105890 */ "branches-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=105986 */ "branches-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=106086 */ "branches-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=106184 */ "bpu\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106268 */ "bpu-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106357 */ "bpu-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106451 */ "bpu-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106550 */ "bpu-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106643 */ "bpu-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=106739 */ "bpu-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=106839 */ "bpu-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=106937 */ "bpu-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107027 */ "bpu-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107122 */ "bpu-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107222 */ "bpu-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107316 */ "bpu-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107413 */ "bpu-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=107514 */ "bpu-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=107613 */ "bpu-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107702 */ "bpu-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107796 */ "bpu-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107895 */ "bpu-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=107988 */ "bpu-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108084 */ "bpu-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=108184 */ "bpu-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=108282 */ "bpu-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108371 */ "bpu-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108465 */ "bpu-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108553 */ "bpu-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108644 */ "bpu-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=108739 */ "bpu-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=108832 */ "btb\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=108916 */ "btb-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109005 */ "btb-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109099 */ "btb-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109198 */ "btb-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109291 */ "btb-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109387 */ "btb-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=109487 */ "btb-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=109585 */ "btb-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109675 */ "btb-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109770 */ "btb-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109870 */ "btb-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=109964 */ "btb-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110061 */ "btb-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=110162 */ "btb-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=110261 */ "btb-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110350 */ "btb-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110444 */ "btb-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110543 */ "btb-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110636 */ "btb-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=110732 */ "btb-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=110832 */ "btb-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=110930 */ "btb-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111019 */ "btb-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111113 */ "btb-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111201 */ "btb-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111292 */ "btb-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=111387 */ "btb-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=111480 */ "bpc\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111564 */ "bpc-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111653 */ "bpc-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111747 */ "bpc-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111846 */ "bpc-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=111939 */ "bpc-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112035 */ "bpc-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=112135 */ "bpc-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=112233 */ "bpc-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112323 */ "bpc-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112418 */ "bpc-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112518 */ "bpc-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112612 */ "bpc-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112709 */ "bpc-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=112810 */ "bpc-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=112909 */ "bpc-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=112998 */ "bpc-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113092 */ "bpc-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113191 */ "bpc-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113284 */ "bpc-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113380 */ "bpc-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=113480 */ "bpc-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=113578 */ "bpc-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113667 */ "bpc-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113761 */ "bpc-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113849 */ "bpc-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000"
+/* offset=113940 */ "bpc-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=114035 */ "bpc-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000"
+/* offset=114128 */ "node\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114203 */ "node-load\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114283 */ "node-load-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114368 */ "node-load-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114458 */ "node-load-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114542 */ "node-load-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114629 */ "node-load-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00000\000\000\000\000\000"
+/* offset=114720 */ "node-load-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=114809 */ "node-loads\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00000\000\000\000\000\000"
+/* offset=114890 */ "node-loads-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=114976 */ "node-loads-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115067 */ "node-loads-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115152 */ "node-loads-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115240 */ "node-loads-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=115332 */ "node-loads-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=115422 */ "node-read\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115502 */ "node-read-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115587 */ "node-read-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115677 */ "node-read-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115761 */ "node-read-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=115848 */ "node-read-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=115939 */ "node-read-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=116028 */ "node-store\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116114 */ "node-store-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116205 */ "node-store-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116301 */ "node-store-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116391 */ "node-store-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116484 */ "node-store-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00000\000\000\000\000\000"
+/* offset=116577 */ "node-store-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000"
+/* offset=116668 */ "node-stores\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00000\000\000\000\000\000"
+/* offset=116755 */ "node-stores-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116847 */ "node-stores-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=116944 */ "node-stores-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117035 */ "node-stores-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117129 */ "node-stores-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000"
+/* offset=117223 */ "node-stores-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000"
+/* offset=117315 */ "node-write\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117401 */ "node-write-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117492 */ "node-write-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117588 */ "node-write-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117678 */ "node-write-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000"
+/* offset=117771 */ "node-write-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000"
+/* offset=117864 */ "node-write-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000"
+/* offset=117955 */ "node-prefetch\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118047 */ "node-prefetch-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118144 */ "node-prefetch-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118246 */ "node-prefetch-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118342 */ "node-prefetch-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118441 */ "node-prefetch-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00000\000\000\000\000\000"
+/* offset=118540 */ "node-prefetch-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=118637 */ "node-prefetches\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00000\000\000\000\000\000"
+/* offset=118731 */ "node-prefetches-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118830 */ "node-prefetches-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=118934 */ "node-prefetches-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119032 */ "node-prefetches-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119133 */ "node-prefetches-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=119234 */ "node-prefetches-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=119333 */ "node-speculative-read\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119433 */ "node-speculative-read-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119538 */ "node-speculative-read-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119648 */ "node-speculative-read-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119752 */ "node-speculative-read-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=119859 */ "node-speculative-read-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=119966 */ "node-speculative-read-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=120071 */ "node-speculative-load\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=120171 */ "node-speculative-load-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=120276 */ "node-speculative-load-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=120386 */ "node-speculative-load-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=120490 */ "node-speculative-load-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000"
+/* offset=120597 */ "node-speculative-load-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=120704 */ "node-speculative-load-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000"
+/* offset=120809 */ "node-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=120889 */ "node-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=120974 */ "node-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=121053 */ "node-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000"
+/* offset=121135 */ "node-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=121221 */ "node-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000"
+/* offset=121305 */ "cpu-cycles\000legacy hardware\000Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cycles]\000legacy-hardware-config=0\000\00000\000\000\000\000\000"
+/* offset=121467 */ "cycles\000legacy hardware\000Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cpu-cycles]\000legacy-hardware-config=0\000\00000\000\000\000\000\000"
+/* offset=121629 */ "instructions\000legacy hardware\000Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts\000legacy-hardware-config=1\000\00000\000\000\000\000\000"
+/* offset=121805 */ "cache-references\000legacy hardware\000Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU\000legacy-hardware-config=2\000\00000\000\000\000\000\000"
+/* offset=122075 */ "cache-misses\000legacy hardware\000Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates\000legacy-hardware-config=3\000\00000\000\000\000\000\000"
+/* offset=122318 */ "branches\000legacy hardware\000Retired branch instructions [This event is an alias of branch-instructions]\000legacy-hardware-config=4\000\00000\000\000\000\000\000"
+/* offset=122452 */ "branch-instructions\000legacy hardware\000Retired branch instructions [This event is an alias of branches]\000legacy-hardware-config=4\000\00000\000\000\000\000\000"
+/* offset=122586 */ "branch-misses\000legacy hardware\000Mispredicted branch instructions\000legacy-hardware-config=5\000\00000\000\000\000\000\000"
+/* offset=122682 */ "bus-cycles\000legacy hardware\000Bus cycles, which can be different from total cycles\000legacy-hardware-config=6\000\00000\000\000\000\000\000"
+/* offset=122795 */ "stalled-cycles-frontend\000legacy hardware\000Stalled cycles during issue [This event is an alias of idle-cycles-frontend]\000legacy-hardware-config=7\000\00000\000\000\000\000\000"
+/* offset=122945 */ "idle-cycles-frontend\000legacy hardware\000Stalled cycles during issue [This event is an alias of stalled-cycles-fronted]\000legacy-hardware-config=7\000\00000\000\000\000\000\000"
+/* offset=123094 */ "stalled-cycles-backend\000legacy hardware\000Stalled cycles during retirement [This event is an alias of idle-cycles-backend]\000legacy-hardware-config=8\000\00000\000\000\000\000\000"
+/* offset=123247 */ "idle-cycles-backend\000legacy hardware\000Stalled cycles during retirement [This event is an alias of stalled-cycles-backend]\000legacy-hardware-config=8\000\00000\000\000\000\000\000"
+/* offset=123400 */ "ref-cycles\000legacy hardware\000Total cycles; not affected by CPU frequency scaling\000legacy-hardware-config=9\000\00000\000\000\000\000\000"
+/* offset=123512 */ "software\000"
+/* offset=123521 */ "cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\000001e-6msec\000\000\000\000\000"
+/* offset=123607 */ "task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\000001e-6msec\000\000\000\000\000"
+/* offset=123695 */ "faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000"
+/* offset=123790 */ "page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000"
+/* offset=123885 */ "context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000"
+/* offset=123986 */ "cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000"
+/* offset=124087 */ "cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000"
+/* offset=124219 */ "migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000"
+/* offset=124351 */ "minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000"
+/* offset=124460 */ "major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000"
+/* offset=124563 */ "alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000"
+/* offset=124655 */ "emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000"
+/* offset=124782 */ "dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000"
+/* offset=124862 */ "bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000"
+/* offset=124964 */ "cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000"
+/* offset=125067 */ "tool\000"
+/* offset=125072 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000"
+/* offset=125148 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000"
+/* offset=125218 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000"
+/* offset=125286 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000"
+/* offset=125362 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000"
+/* offset=125507 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000"
+/* offset=125610 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000"
+/* offset=125727 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000"
+/* offset=125803 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000"
+/* offset=125889 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000"
+/* offset=125999 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000"
+/* offset=126106 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000"
+/* offset=126205 */ "core_wide\000tool\0001 if not SMT, if SMT are events being gathered on all SMT threads 1 otherwise 0\000config=0xd\000\00000\000\000\000\000\000"
+/* offset=126319 */ "target_cpu\000tool\0001 if CPUs being analyzed, 0 if threads/processes\000config=0xe\000\00000\000\000\000\000\000"
+/* offset=126403 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000"
+/* offset=126465 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000"
+/* offset=126527 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000"
+/* offset=126625 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000"
+/* offset=126727 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000"
+/* offset=126860 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000"
+/* offset=126978 */ "hisi_sccl,ddrc\000"
+/* offset=126993 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000"
+/* offset=127063 */ "uncore_cbox\000"
+/* offset=127075 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000"
+/* offset=127229 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000"
+/* offset=127283 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000"
+/* offset=127341 */ "hisi_sccl,l3c\000"
+/* offset=127355 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000"
+/* offset=127423 */ "uncore_imc_free_running\000"
+/* offset=127447 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000"
+/* offset=127527 */ "uncore_imc\000"
+/* offset=127538 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000"
+/* offset=127603 */ "uncore_sys_ddr_pmu\000"
+/* offset=127622 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000"
+/* offset=127698 */ "uncore_sys_ccn_pmu\000"
+/* offset=127717 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000"
+/* offset=127794 */ "uncore_sys_cmn_pmu\000"
+/* offset=127813 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000"
+/* offset=127956 */ "CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011"
+/* offset=128142 */ "cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011"
+/* offset=128375 */ "migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011"
+/* offset=128635 */ "page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011"
+/* offset=128866 */ "insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001"
+/* offset=128979 */ "stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001"
+/* offset=129143 */ "frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001"
+/* offset=129273 */ "backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001"
+/* offset=129399 */ "cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011"
+/* offset=129575 */ "branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011"
+/* offset=129755 */ "branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001"
+/* offset=129859 */ "l1d_miss_rate\000Default2\000L1\\-dcache\\-load\\-misses / L1\\-dcache\\-loads\000l1d_miss_rate > 0.05\000L1D miss rate\000\000100%\000\000\000\000001"
+/* offset=129975 */ "llc_miss_rate\000Default2\000LLC\\-load\\-misses / LLC\\-loads\000llc_miss_rate > 0.05\000LLC miss rate\000\000100%\000\000\000\000001"
+/* offset=130076 */ "l1i_miss_rate\000Default3\000L1\\-icache\\-load\\-misses / L1\\-icache\\-loads\000l1i_miss_rate > 0.05\000L1I miss rate\000\000100%\000\000\000\000001"
+/* offset=130191 */ "dtlb_miss_rate\000Default3\000dTLB\\-load\\-misses / dTLB\\-loads\000dtlb_miss_rate > 0.05\000dTLB miss rate\000\000100%\000\000\000\000001"
+/* offset=130297 */ "itlb_miss_rate\000Default3\000iTLB\\-load\\-misses / iTLB\\-loads\000itlb_miss_rate > 0.05\000iTLB miss rate\000\000100%\000\000\000\000001"
+/* offset=130403 */ "l1_prefetch_miss_rate\000Default4\000L1\\-dcache\\-prefetch\\-misses / L1\\-dcache\\-prefetches\000l1_prefetch_miss_rate > 0.05\000L1 prefetch miss rate\000\000100%\000\000\000\000001"
+/* offset=130551 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000"
+/* offset=130574 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000"
+/* offset=130638 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000"
+/* offset=130805 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000"
+/* offset=130870 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000"
+/* offset=130938 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000"
+/* offset=131010 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000"
+/* offset=131105 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000"
+/* offset=131240 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000"
+/* offset=131305 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000"
+/* offset=131374 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000"
+/* offset=131445 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\000000"
+/* offset=131468 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\000000"
+/* offset=131491 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\000000"
+/* offset=131512 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000"
;
+static const struct compact_pmu_event pmu_events__common_default_core[] = {
+{ 111480 }, /* bpc\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113849 }, /* bpc-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111564 }, /* bpc-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111939 }, /* bpc-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112135 }, /* bpc-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 112035 }, /* bpc-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 111846 }, /* bpc-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111747 }, /* bpc-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111653 }, /* bpc-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112233 }, /* bpc-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112612 }, /* bpc-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112810 }, /* bpc-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 112709 }, /* bpc-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 112518 }, /* bpc-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112418 }, /* bpc-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112323 }, /* bpc-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 114035 }, /* bpc-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 113940 }, /* bpc-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 113761 }, /* bpc-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112909 }, /* bpc-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113284 }, /* bpc-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113480 }, /* bpc-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 113380 }, /* bpc-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 113191 }, /* bpc-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113092 }, /* bpc-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 112998 }, /* bpc-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113667 }, /* bpc-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 113578 }, /* bpc-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106184 }, /* bpu\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108553 }, /* bpu-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106268 }, /* bpu-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106643 }, /* bpu-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106839 }, /* bpu-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 106739 }, /* bpu-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 106550 }, /* bpu-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106451 }, /* bpu-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106357 }, /* bpu-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106937 }, /* bpu-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107316 }, /* bpu-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107514 }, /* bpu-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 107413 }, /* bpu-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 107222 }, /* bpu-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107122 }, /* bpu-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107027 }, /* bpu-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108739 }, /* bpu-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 108644 }, /* bpu-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 108465 }, /* bpu-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107613 }, /* bpu-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107988 }, /* bpu-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108184 }, /* bpu-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 108084 }, /* bpu-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 107895 }, /* bpu-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107796 }, /* bpu-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 107702 }, /* bpu-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108371 }, /* bpu-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108282 }, /* bpu-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 100851 }, /* branch\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103295 }, /* branch-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 122452 }, /* branch-instructions\000legacy hardware\000Retired branch instructions [This event is an alias of branches]\000legacy-hardware-config=4\000\00000\000\000\000\000\000 */
+{ 100938 }, /* branch-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101325 }, /* branch-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101527 }, /* branch-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 101424 }, /* branch-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00000\000\000\000\000\000 */
+{ 101229 }, /* branch-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101127 }, /* branch-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101030 }, /* branch-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101628 }, /* branch-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00000\000\000\000\000\000 */
+{ 102019 }, /* branch-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102223 }, /* branch-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 102119 }, /* branch-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 101922 }, /* branch-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101819 }, /* branch-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 101721 }, /* branch-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103389 }, /* branch-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 122586 }, /* branch-misses\000legacy hardware\000Mispredicted branch instructions\000legacy-hardware-config=5\000\00000\000\000\000\000\000 */
+{ 103204 }, /* branch-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102325 }, /* branch-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102712 }, /* branch-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102914 }, /* branch-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 102811 }, /* branch-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 102616 }, /* branch-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102514 }, /* branch-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 102417 }, /* branch-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103107 }, /* branch-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103015 }, /* branch-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 122318 }, /* branches\000legacy hardware\000Retired branch instructions [This event is an alias of branch-instructions]\000legacy-hardware-config=4\000\00000\000\000\000\000\000 */
+{ 105890 }, /* branches-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103485 }, /* branches-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103880 }, /* branches-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104086 }, /* branches-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 103981 }, /* branches-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 103782 }, /* branches-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103678 }, /* branches-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 103579 }, /* branches-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104189 }, /* branches-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104588 }, /* branches-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104796 }, /* branches-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 104690 }, /* branches-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 104489 }, /* branches-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104384 }, /* branches-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104284 }, /* branches-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 106086 }, /* branches-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 105986 }, /* branches-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 105797 }, /* branches-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104900 }, /* branches-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 105295 }, /* branches-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 105501 }, /* branches-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 105396 }, /* branches-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 105197 }, /* branches-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 105093 }, /* branches-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 104994 }, /* branches-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 105698 }, /* branches-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 105604 }, /* branches-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108832 }, /* btb\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111201 }, /* btb-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 108916 }, /* btb-load\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109291 }, /* btb-load-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109487 }, /* btb-load-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 109387 }, /* btb-load-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 109198 }, /* btb-load-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109099 }, /* btb-load-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109005 }, /* btb-load-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109585 }, /* btb-loads\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109964 }, /* btb-loads-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110162 }, /* btb-loads-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 110061 }, /* btb-loads-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 109870 }, /* btb-loads-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109770 }, /* btb-loads-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 109675 }, /* btb-loads-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111387 }, /* btb-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 111292 }, /* btb-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 111113 }, /* btb-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110261 }, /* btb-read\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110636 }, /* btb-read-access\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110832 }, /* btb-read-miss\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 110732 }, /* btb-read-misses\000legacy cache\000Branch prediction unit read misses\000legacy-cache-config=0x10005\000\00010\000\000\000\000\000 */
+{ 110543 }, /* btb-read-ops\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110444 }, /* btb-read-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110350 }, /* btb-read-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 111019 }, /* btb-reference\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 110930 }, /* btb-refs\000legacy cache\000Branch prediction unit read accesses\000legacy-cache-config=5\000\00010\000\000\000\000\000 */
+{ 122682 }, /* bus-cycles\000legacy hardware\000Bus cycles, which can be different from total cycles\000legacy-hardware-config=6\000\00000\000\000\000\000\000 */
+{ 122075 }, /* cache-misses\000legacy hardware\000Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates\000legacy-hardware-config=3\000\00000\000\000\000\000\000 */
+{ 121805 }, /* cache-references\000legacy hardware\000Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU\000legacy-hardware-config=2\000\00000\000\000\000\000\000 */
+{ 121305 }, /* cpu-cycles\000legacy hardware\000Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cycles]\000legacy-hardware-config=0\000\00000\000\000\000\000\000 */
+{ 121467 }, /* cycles\000legacy hardware\000Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cpu-cycles]\000legacy-hardware-config=0\000\00000\000\000\000\000\000 */
+{ 78952 }, /* d-tlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 85655 }, /* d-tlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79024 }, /* d-tlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79351 }, /* d-tlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79523 }, /* d-tlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 79435 }, /* d-tlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 79270 }, /* d-tlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79183 }, /* d-tlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79101 }, /* d-tlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79609 }, /* d-tlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79940 }, /* d-tlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 80114 }, /* d-tlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 80025 }, /* d-tlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 79858 }, /* d-tlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79770 }, /* d-tlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 79687 }, /* d-tlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 85817 }, /* d-tlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 85734 }, /* d-tlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 85579 }, /* d-tlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 82650 }, /* d-tlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83025 }, /* d-tlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83217 }, /* d-tlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 83121 }, /* d-tlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 82932 }, /* d-tlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 82833 }, /* d-tlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 82739 }, /* d-tlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83311 }, /* d-tlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83694 }, /* d-tlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83890 }, /* d-tlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 83792 }, /* d-tlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 83599 }, /* d-tlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83498 }, /* d-tlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83402 }, /* d-tlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 80201 }, /* d-tlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 80528 }, /* d-tlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 80700 }, /* d-tlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 80612 }, /* d-tlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 80447 }, /* d-tlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 80360 }, /* d-tlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 80278 }, /* d-tlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 85497 }, /* d-tlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 85420 }, /* d-tlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 84703 }, /* d-tlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 85110 }, /* d-tlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 85318 }, /* d-tlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 85214 }, /* d-tlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 85009 }, /* d-tlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84902 }, /* d-tlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84800 }, /* d-tlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 83986 }, /* d-tlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84393 }, /* d-tlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84601 }, /* d-tlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 84497 }, /* d-tlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 84292 }, /* d-tlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84185 }, /* d-tlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 84083 }, /* d-tlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 80786 }, /* d-tlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81137 }, /* d-tlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81317 }, /* d-tlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 81227 }, /* d-tlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 81050 }, /* d-tlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 80957 }, /* d-tlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 80869 }, /* d-tlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81405 }, /* d-tlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81760 }, /* d-tlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81942 }, /* d-tlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 81851 }, /* d-tlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 81672 }, /* d-tlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81578 }, /* d-tlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 81489 }, /* d-tlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 82031 }, /* d-tlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 82382 }, /* d-tlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 82562 }, /* d-tlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 82472 }, /* d-tlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 82295 }, /* d-tlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 82202 }, /* d-tlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 82114 }, /* d-tlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 85898 }, /* data-tlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 92823 }, /* data-tlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 85973 }, /* data-tlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86312 }, /* data-tlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86490 }, /* data-tlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 86399 }, /* data-tlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 86228 }, /* data-tlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86138 }, /* data-tlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86053 }, /* data-tlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86579 }, /* data-tlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86922 }, /* data-tlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 87102 }, /* data-tlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 87010 }, /* data-tlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 86837 }, /* data-tlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86746 }, /* data-tlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 86660 }, /* data-tlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 92991 }, /* data-tlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 92905 }, /* data-tlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 92744 }, /* data-tlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 89725 }, /* data-tlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90112 }, /* data-tlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90310 }, /* data-tlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 90211 }, /* data-tlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 90016 }, /* data-tlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 89914 }, /* data-tlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 89817 }, /* data-tlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90407 }, /* data-tlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90802 }, /* data-tlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91004 }, /* data-tlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 90903 }, /* data-tlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 90704 }, /* data-tlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90600 }, /* data-tlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 90501 }, /* data-tlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 87192 }, /* data-tlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 87531 }, /* data-tlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 87709 }, /* data-tlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 87618 }, /* data-tlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 87447 }, /* data-tlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 87357 }, /* data-tlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 87272 }, /* data-tlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 92659 }, /* data-tlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 92579 }, /* data-tlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 91841 }, /* data-tlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 92260 }, /* data-tlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 92474 }, /* data-tlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 92367 }, /* data-tlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 92156 }, /* data-tlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 92046 }, /* data-tlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91941 }, /* data-tlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91103 }, /* data-tlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91522 }, /* data-tlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91736 }, /* data-tlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 91629 }, /* data-tlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 91418 }, /* data-tlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91308 }, /* data-tlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 91203 }, /* data-tlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 87798 }, /* data-tlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88161 }, /* data-tlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88347 }, /* data-tlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 88254 }, /* data-tlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 88071 }, /* data-tlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 87975 }, /* data-tlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 87884 }, /* data-tlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88438 }, /* data-tlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88805 }, /* data-tlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88993 }, /* data-tlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 88899 }, /* data-tlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 88714 }, /* data-tlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88617 }, /* data-tlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 88525 }, /* data-tlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 89085 }, /* data-tlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 89448 }, /* data-tlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 89634 }, /* data-tlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 89541 }, /* data-tlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 89358 }, /* data-tlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 89262 }, /* data-tlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 89171 }, /* data-tlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 72083 }, /* dtlb\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 78712 }, /* dtlb-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72154 }, /* dtlb-load\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72477 }, /* dtlb-load-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72647 }, /* dtlb-load-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 72560 }, /* dtlb-load-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00000\000\000\000\000\000 */
+{ 72397 }, /* dtlb-load-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72311 }, /* dtlb-load-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72230 }, /* dtlb-load-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72732 }, /* dtlb-loads\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00000\000\000\000\000\000 */
+{ 73059 }, /* dtlb-loads-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 73231 }, /* dtlb-loads-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 73143 }, /* dtlb-loads-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 72978 }, /* dtlb-loads-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72891 }, /* dtlb-loads-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 72809 }, /* dtlb-loads-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 78872 }, /* dtlb-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 78790 }, /* dtlb-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 78637 }, /* dtlb-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 75738 }, /* dtlb-prefetch\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76109 }, /* dtlb-prefetch-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76299 }, /* dtlb-prefetch-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 76204 }, /* dtlb-prefetch-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00000\000\000\000\000\000 */
+{ 76017 }, /* dtlb-prefetch-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 75919 }, /* dtlb-prefetch-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 75826 }, /* dtlb-prefetch-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76392 }, /* dtlb-prefetches\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00000\000\000\000\000\000 */
+{ 76771 }, /* dtlb-prefetches-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76965 }, /* dtlb-prefetches-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 76868 }, /* dtlb-prefetches-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 76677 }, /* dtlb-prefetches-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76577 }, /* dtlb-prefetches-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 76482 }, /* dtlb-prefetches-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 73317 }, /* dtlb-read\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 73640 }, /* dtlb-read-access\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 73810 }, /* dtlb-read-miss\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 73723 }, /* dtlb-read-misses\000legacy cache\000Data TLB read misses\000legacy-cache-config=0x10003\000\00010\000\000\000\000\000 */
+{ 73560 }, /* dtlb-read-ops\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 73474 }, /* dtlb-read-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 73393 }, /* dtlb-read-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 78556 }, /* dtlb-reference\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 78480 }, /* dtlb-refs\000legacy cache\000Data TLB read accesses\000legacy-cache-config=3\000\00010\000\000\000\000\000 */
+{ 77770 }, /* dtlb-speculative-load\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 78173 }, /* dtlb-speculative-load-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 78379 }, /* dtlb-speculative-load-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 78276 }, /* dtlb-speculative-load-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 78073 }, /* dtlb-speculative-load-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77967 }, /* dtlb-speculative-load-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77866 }, /* dtlb-speculative-load-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77060 }, /* dtlb-speculative-read\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77463 }, /* dtlb-speculative-read-access\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77669 }, /* dtlb-speculative-read-miss\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 77566 }, /* dtlb-speculative-read-misses\000legacy cache\000Data TLB prefetch misses\000legacy-cache-config=0x10203\000\00010\000\000\000\000\000 */
+{ 77363 }, /* dtlb-speculative-read-ops\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77257 }, /* dtlb-speculative-read-reference\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 77156 }, /* dtlb-speculative-read-refs\000legacy cache\000Data TLB prefetch accesses\000legacy-cache-config=0x203\000\00010\000\000\000\000\000 */
+{ 73895 }, /* dtlb-store\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74242 }, /* dtlb-store-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74420 }, /* dtlb-store-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 74331 }, /* dtlb-store-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00000\000\000\000\000\000 */
+{ 74156 }, /* dtlb-store-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74064 }, /* dtlb-store-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 73977 }, /* dtlb-store-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74507 }, /* dtlb-stores\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00000\000\000\000\000\000 */
+{ 74858 }, /* dtlb-stores-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75038 }, /* dtlb-stores-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 74948 }, /* dtlb-stores-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 74771 }, /* dtlb-stores-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74678 }, /* dtlb-stores-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 74590 }, /* dtlb-stores-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75126 }, /* dtlb-write\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75473 }, /* dtlb-write-access\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75651 }, /* dtlb-write-miss\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 75562 }, /* dtlb-write-misses\000legacy cache\000Data TLB write misses\000legacy-cache-config=0x10103\000\00010\000\000\000\000\000 */
+{ 75387 }, /* dtlb-write-ops\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75295 }, /* dtlb-write-reference\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 75208 }, /* dtlb-write-refs\000legacy cache\000Data TLB write accesses\000legacy-cache-config=0x103\000\00010\000\000\000\000\000 */
+{ 95555 }, /* i-tlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97799 }, /* i-tlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95634 }, /* i-tlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95989 }, /* i-tlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96175 }, /* i-tlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 96080 }, /* i-tlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 95901 }, /* i-tlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95807 }, /* i-tlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95718 }, /* i-tlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96268 }, /* i-tlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96627 }, /* i-tlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96815 }, /* i-tlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 96719 }, /* i-tlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 96538 }, /* i-tlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96443 }, /* i-tlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96353 }, /* i-tlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97975 }, /* i-tlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 97885 }, /* i-tlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 97716 }, /* i-tlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96909 }, /* i-tlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97264 }, /* i-tlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97450 }, /* i-tlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 97355 }, /* i-tlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 97176 }, /* i-tlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97082 }, /* i-tlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 96993 }, /* i-tlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97627 }, /* i-tlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 97543 }, /* i-tlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 123247 }, /* idle-cycles-backend\000legacy hardware\000Stalled cycles during retirement [This event is an alias of stalled-cycles-backend]\000legacy-hardware-config=8\000\00000\000\000\000\000\000 */
+{ 122945 }, /* idle-cycles-frontend\000legacy hardware\000Stalled cycles during issue [This event is an alias of stalled-cycles-fronted]\000legacy-hardware-config=7\000\00000\000\000\000\000\000 */
+{ 98063 }, /* instruction-tlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 100557 }, /* instruction-tlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98152 }, /* instruction-tlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98547 }, /* instruction-tlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98753 }, /* instruction-tlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 98648 }, /* instruction-tlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 98449 }, /* instruction-tlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98345 }, /* instruction-tlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98246 }, /* instruction-tlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98856 }, /* instruction-tlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99255 }, /* instruction-tlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99463 }, /* instruction-tlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 99357 }, /* instruction-tlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 99156 }, /* instruction-tlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99051 }, /* instruction-tlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 98951 }, /* instruction-tlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 100753 }, /* instruction-tlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 100653 }, /* instruction-tlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 100464 }, /* instruction-tlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99567 }, /* instruction-tlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99962 }, /* instruction-tlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 100168 }, /* instruction-tlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 100063 }, /* instruction-tlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 99864 }, /* instruction-tlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99760 }, /* instruction-tlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 99661 }, /* instruction-tlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 100365 }, /* instruction-tlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 100271 }, /* instruction-tlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 121629 }, /* instructions\000legacy hardware\000Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts\000legacy-hardware-config=1\000\00000\000\000\000\000\000 */
+{ 93075 }, /* itlb\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95294 }, /* itlb-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93153 }, /* itlb-load\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93504 }, /* itlb-load-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93688 }, /* itlb-load-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 93594 }, /* itlb-load-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00000\000\000\000\000\000 */
+{ 93417 }, /* itlb-load-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93324 }, /* itlb-load-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93236 }, /* itlb-load-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93780 }, /* itlb-loads\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00000\000\000\000\000\000 */
+{ 94135 }, /* itlb-loads-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94321 }, /* itlb-loads-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 94226 }, /* itlb-loads-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 94047 }, /* itlb-loads-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93953 }, /* itlb-loads-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 93864 }, /* itlb-loads-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95468 }, /* itlb-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 95379 }, /* itlb-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 95212 }, /* itlb-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94414 }, /* itlb-read\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94765 }, /* itlb-read-access\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94949 }, /* itlb-read-miss\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 94855 }, /* itlb-read-misses\000legacy cache\000Instruction TLB read misses\000legacy-cache-config=0x10004\000\00010\000\000\000\000\000 */
+{ 94678 }, /* itlb-read-ops\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94585 }, /* itlb-read-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 94497 }, /* itlb-read-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95124 }, /* itlb-reference\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 95041 }, /* itlb-refs\000legacy cache\000Instruction TLB read accesses\000legacy-cache-config=4\000\00010\000\000\000\000\000 */
+{ 8037 }, /* l1-d\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15406 }, /* l1-d-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8118 }, /* l1-d-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8481 }, /* l1-d-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8671 }, /* l1-d-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 8574 }, /* l1-d-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 8391 }, /* l1-d-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8295 }, /* l1-d-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8204 }, /* l1-d-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8766 }, /* l1-d-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9133 }, /* l1-d-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9325 }, /* l1-d-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 9227 }, /* l1-d-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 9042 }, /* l1-d-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8945 }, /* l1-d-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 8853 }, /* l1-d-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15586 }, /* l1-d-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 15494 }, /* l1-d-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 15321 }, /* l1-d-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 12122 }, /* l1-d-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12533 }, /* l1-d-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12743 }, /* l1-d-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 12638 }, /* l1-d-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 12431 }, /* l1-d-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12323 }, /* l1-d-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12220 }, /* l1-d-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12846 }, /* l1-d-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13265 }, /* l1-d-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13479 }, /* l1-d-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 13372 }, /* l1-d-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 13161 }, /* l1-d-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13051 }, /* l1-d-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 12946 }, /* l1-d-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 9421 }, /* l1-d-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9784 }, /* l1-d-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9974 }, /* l1-d-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 9877 }, /* l1-d-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 9694 }, /* l1-d-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9598 }, /* l1-d-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 9507 }, /* l1-d-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15230 }, /* l1-d-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15144 }, /* l1-d-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 14364 }, /* l1-d-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 14807 }, /* l1-d-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 15033 }, /* l1-d-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 14920 }, /* l1-d-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 14697 }, /* l1-d-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 14581 }, /* l1-d-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 14470 }, /* l1-d-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13584 }, /* l1-d-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 14027 }, /* l1-d-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 14253 }, /* l1-d-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 14140 }, /* l1-d-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 13917 }, /* l1-d-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13801 }, /* l1-d-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 13690 }, /* l1-d-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 10069 }, /* l1-d-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10456 }, /* l1-d-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10654 }, /* l1-d-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 10555 }, /* l1-d-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 10360 }, /* l1-d-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10258 }, /* l1-d-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10161 }, /* l1-d-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10751 }, /* l1-d-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11142 }, /* l1-d-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11342 }, /* l1-d-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 11242 }, /* l1-d-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 11045 }, /* l1-d-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10942 }, /* l1-d-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 10844 }, /* l1-d-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11440 }, /* l1-d-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11827 }, /* l1-d-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 12025 }, /* l1-d-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 11926 }, /* l1-d-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 11731 }, /* l1-d-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11629 }, /* l1-d-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 11532 }, /* l1-d-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 23238 }, /* l1-data\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 30829 }, /* l1-data-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23322 }, /* l1-data-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23697 }, /* l1-data-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23893 }, /* l1-data-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 23793 }, /* l1-data-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 23604 }, /* l1-data-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23505 }, /* l1-data-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23411 }, /* l1-data-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23991 }, /* l1-data-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24370 }, /* l1-data-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24568 }, /* l1-data-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 24467 }, /* l1-data-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 24276 }, /* l1-data-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24176 }, /* l1-data-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24081 }, /* l1-data-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 31015 }, /* l1-data-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 30920 }, /* l1-data-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 30741 }, /* l1-data-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 27452 }, /* l1-data-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 27875 }, /* l1-data-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28091 }, /* l1-data-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 27983 }, /* l1-data-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 27770 }, /* l1-data-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 27659 }, /* l1-data-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 27553 }, /* l1-data-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28197 }, /* l1-data-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28628 }, /* l1-data-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28848 }, /* l1-data-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 28738 }, /* l1-data-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 28521 }, /* l1-data-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28408 }, /* l1-data-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28300 }, /* l1-data-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 24667 }, /* l1-data-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 25042 }, /* l1-data-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 25238 }, /* l1-data-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 25138 }, /* l1-data-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 24949 }, /* l1-data-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24850 }, /* l1-data-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 24756 }, /* l1-data-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 30647 }, /* l1-data-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 30558 }, /* l1-data-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 29757 }, /* l1-data-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 30212 }, /* l1-data-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 30444 }, /* l1-data-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 30328 }, /* l1-data-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 30099 }, /* l1-data-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29980 }, /* l1-data-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29866 }, /* l1-data-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 28956 }, /* l1-data-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29411 }, /* l1-data-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29643 }, /* l1-data-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 29527 }, /* l1-data-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 29298 }, /* l1-data-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29179 }, /* l1-data-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 29065 }, /* l1-data-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 25336 }, /* l1-data-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 25735 }, /* l1-data-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 25939 }, /* l1-data-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 25837 }, /* l1-data-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 25636 }, /* l1-data-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 25531 }, /* l1-data-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 25431 }, /* l1-data-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26039 }, /* l1-data-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26442 }, /* l1-data-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26648 }, /* l1-data-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 26545 }, /* l1-data-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 26342 }, /* l1-data-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26236 }, /* l1-data-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26135 }, /* l1-data-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26749 }, /* l1-data-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 27148 }, /* l1-data-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 27352 }, /* l1-data-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 27250 }, /* l1-data-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 27049 }, /* l1-data-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26944 }, /* l1-data-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 26844 }, /* l1-data-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 13 }, /* l1-dcache\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 7752 }, /* l1-dcache-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 99 }, /* l1-dcache-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 482 }, /* l1-dcache-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 682 }, /* l1-dcache-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 580 }, /* l1-dcache-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00000\000\000\000\000\000 */
+{ 387 }, /* l1-dcache-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 286 }, /* l1-dcache-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 190 }, /* l1-dcache-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 782 }, /* l1-dcache-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00000\000\000\000\000\000 */
+{ 1169 }, /* l1-dcache-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 1371 }, /* l1-dcache-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 1268 }, /* l1-dcache-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 1073 }, /* l1-dcache-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 971 }, /* l1-dcache-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 874 }, /* l1-dcache-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 7942 }, /* l1-dcache-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 7845 }, /* l1-dcache-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 7662 }, /* l1-dcache-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 4313 }, /* l1-dcache-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 4744 }, /* l1-dcache-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 4964 }, /* l1-dcache-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 4854 }, /* l1-dcache-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00000\000\000\000\000\000 */
+{ 4637 }, /* l1-dcache-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 4524 }, /* l1-dcache-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 4416 }, /* l1-dcache-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5072 }, /* l1-dcache-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00000\000\000\000\000\000 */
+{ 5511 }, /* l1-dcache-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5735 }, /* l1-dcache-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 5623 }, /* l1-dcache-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 5402 }, /* l1-dcache-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5287 }, /* l1-dcache-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5177 }, /* l1-dcache-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 1472 }, /* l1-dcache-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 1855 }, /* l1-dcache-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 2055 }, /* l1-dcache-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 1953 }, /* l1-dcache-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 1760 }, /* l1-dcache-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 1659 }, /* l1-dcache-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 1563 }, /* l1-dcache-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 7566 }, /* l1-dcache-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 7475 }, /* l1-dcache-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 6660 }, /* l1-dcache-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 7123 }, /* l1-dcache-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 7359 }, /* l1-dcache-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 7241 }, /* l1-dcache-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 7008 }, /* l1-dcache-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 6887 }, /* l1-dcache-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 6771 }, /* l1-dcache-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5845 }, /* l1-dcache-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 6308 }, /* l1-dcache-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 6544 }, /* l1-dcache-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 6426 }, /* l1-dcache-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 6193 }, /* l1-dcache-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 6072 }, /* l1-dcache-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 5956 }, /* l1-dcache-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 2155 }, /* l1-dcache-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2562 }, /* l1-dcache-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2770 }, /* l1-dcache-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 2666 }, /* l1-dcache-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00000\000\000\000\000\000 */
+{ 2461 }, /* l1-dcache-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2354 }, /* l1-dcache-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2252 }, /* l1-dcache-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2872 }, /* l1-dcache-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00000\000\000\000\000\000 */
+{ 3283 }, /* l1-dcache-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 3493 }, /* l1-dcache-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 3388 }, /* l1-dcache-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 3181 }, /* l1-dcache-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 3073 }, /* l1-dcache-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 2970 }, /* l1-dcache-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 3596 }, /* l1-dcache-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 4003 }, /* l1-dcache-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 4211 }, /* l1-dcache-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 4107 }, /* l1-dcache-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 3902 }, /* l1-dcache-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 3795 }, /* l1-dcache-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 3693 }, /* l1-dcache-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 37366 }, /* l1-i\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43053 }, /* l1-i-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37454 }, /* l1-i-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37845 }, /* l1-i-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38049 }, /* l1-i-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 37945 }, /* l1-i-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 37748 }, /* l1-i-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37645 }, /* l1-i-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37547 }, /* l1-i-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38151 }, /* l1-i-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38546 }, /* l1-i-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38752 }, /* l1-i-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 38647 }, /* l1-i-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 38448 }, /* l1-i-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38344 }, /* l1-i-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38245 }, /* l1-i-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43247 }, /* l1-i-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 43148 }, /* l1-i-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 42961 }, /* l1-i-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 39552 }, /* l1-i-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 39991 }, /* l1-i-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 40215 }, /* l1-i-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 40103 }, /* l1-i-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 39882 }, /* l1-i-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 39767 }, /* l1-i-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 39657 }, /* l1-i-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 40325 }, /* l1-i-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 40772 }, /* l1-i-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41000 }, /* l1-i-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 40886 }, /* l1-i-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 40661 }, /* l1-i-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 40544 }, /* l1-i-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 40432 }, /* l1-i-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 38855 }, /* l1-i-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 39246 }, /* l1-i-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 39450 }, /* l1-i-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 39346 }, /* l1-i-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 39149 }, /* l1-i-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 39046 }, /* l1-i-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 38948 }, /* l1-i-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 42863 }, /* l1-i-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 42770 }, /* l1-i-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 41941 }, /* l1-i-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 42412 }, /* l1-i-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 42652 }, /* l1-i-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 42532 }, /* l1-i-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 42295 }, /* l1-i-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 42172 }, /* l1-i-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 42054 }, /* l1-i-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41112 }, /* l1-i-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41583 }, /* l1-i-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41823 }, /* l1-i-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 41703 }, /* l1-i-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 41466 }, /* l1-i-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41343 }, /* l1-i-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 41225 }, /* l1-i-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 31108 }, /* l1-icache\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37060 }, /* l1-icache-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31201 }, /* l1-icache-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31612 }, /* l1-icache-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31826 }, /* l1-icache-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 31717 }, /* l1-icache-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00000\000\000\000\000\000 */
+{ 31510 }, /* l1-icache-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31402 }, /* l1-icache-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31299 }, /* l1-icache-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 31933 }, /* l1-icache-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00000\000\000\000\000\000 */
+{ 32348 }, /* l1-icache-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 32564 }, /* l1-icache-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 32454 }, /* l1-icache-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 32245 }, /* l1-icache-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 32136 }, /* l1-icache-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 32032 }, /* l1-icache-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 37264 }, /* l1-icache-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 37160 }, /* l1-icache-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 36963 }, /* l1-icache-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 33404 }, /* l1-icache-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 33863 }, /* l1-icache-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 34097 }, /* l1-icache-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 33980 }, /* l1-icache-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00000\000\000\000\000\000 */
+{ 33749 }, /* l1-icache-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 33629 }, /* l1-icache-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 33514 }, /* l1-icache-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 34212 }, /* l1-icache-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00000\000\000\000\000\000 */
+{ 34679 }, /* l1-icache-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 34917 }, /* l1-icache-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 34798 }, /* l1-icache-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 34563 }, /* l1-icache-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 34441 }, /* l1-icache-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 34324 }, /* l1-icache-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 32672 }, /* l1-icache-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 33083 }, /* l1-icache-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 33297 }, /* l1-icache-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 33188 }, /* l1-icache-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 32981 }, /* l1-icache-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 32873 }, /* l1-icache-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 32770 }, /* l1-icache-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 36860 }, /* l1-icache-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 36762 }, /* l1-icache-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 35898 }, /* l1-icache-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 36389 }, /* l1-icache-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 36639 }, /* l1-icache-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 36514 }, /* l1-icache-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 36267 }, /* l1-icache-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 36139 }, /* l1-icache-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 36016 }, /* l1-icache-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 35034 }, /* l1-icache-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 35525 }, /* l1-icache-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 35775 }, /* l1-icache-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 35650 }, /* l1-icache-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 35403 }, /* l1-icache-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 35275 }, /* l1-icache-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 35152 }, /* l1-icache-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 49266 }, /* l1-instruction\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 55483 }, /* l1-instruction-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 49364 }, /* l1-instruction-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 49795 }, /* l1-instruction-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50019 }, /* l1-instruction-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 49905 }, /* l1-instruction-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 49688 }, /* l1-instruction-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 49575 }, /* l1-instruction-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 49467 }, /* l1-instruction-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50131 }, /* l1-instruction-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50566 }, /* l1-instruction-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50792 }, /* l1-instruction-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 50677 }, /* l1-instruction-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 50458 }, /* l1-instruction-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50344 }, /* l1-instruction-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 50235 }, /* l1-instruction-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 55697 }, /* l1-instruction-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 55588 }, /* l1-instruction-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 55381 }, /* l1-instruction-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 51672 }, /* l1-instruction-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 52151 }, /* l1-instruction-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 52395 }, /* l1-instruction-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 52273 }, /* l1-instruction-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 52032 }, /* l1-instruction-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 51907 }, /* l1-instruction-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 51787 }, /* l1-instruction-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 52515 }, /* l1-instruction-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53002 }, /* l1-instruction-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53250 }, /* l1-instruction-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 53126 }, /* l1-instruction-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 52881 }, /* l1-instruction-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 52754 }, /* l1-instruction-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 52632 }, /* l1-instruction-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 50905 }, /* l1-instruction-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 51336 }, /* l1-instruction-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 51560 }, /* l1-instruction-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 51446 }, /* l1-instruction-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 51229 }, /* l1-instruction-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 51116 }, /* l1-instruction-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 51008 }, /* l1-instruction-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 55273 }, /* l1-instruction-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 55170 }, /* l1-instruction-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 54271 }, /* l1-instruction-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 54782 }, /* l1-instruction-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 55042 }, /* l1-instruction-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 54912 }, /* l1-instruction-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 54655 }, /* l1-instruction-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 54522 }, /* l1-instruction-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 54394 }, /* l1-instruction-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53372 }, /* l1-instruction-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53883 }, /* l1-instruction-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 54143 }, /* l1-instruction-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 54013 }, /* l1-instruction-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 53756 }, /* l1-instruction-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53623 }, /* l1-instruction-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 53495 }, /* l1-instruction-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 15676 }, /* l1d\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 22971 }, /* l1d-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15756 }, /* l1d-load\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16115 }, /* l1d-load-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16303 }, /* l1d-load-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 16207 }, /* l1d-load-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 16026 }, /* l1d-load-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15931 }, /* l1d-load-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 15841 }, /* l1d-load-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16397 }, /* l1d-loads\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16760 }, /* l1d-loads-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16950 }, /* l1d-loads-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 16853 }, /* l1d-loads-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 16670 }, /* l1d-loads-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16574 }, /* l1d-loads-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 16483 }, /* l1d-loads-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 23149 }, /* l1d-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 23058 }, /* l1d-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 22887 }, /* l1d-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 19718 }, /* l1d-prefetch\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20125 }, /* l1d-prefetch-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20333 }, /* l1d-prefetch-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 20229 }, /* l1d-prefetch-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 20024 }, /* l1d-prefetch-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 19917 }, /* l1d-prefetch-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 19815 }, /* l1d-prefetch-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20435 }, /* l1d-prefetches\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20850 }, /* l1d-prefetches-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21062 }, /* l1d-prefetches-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 20956 }, /* l1d-prefetches-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 20747 }, /* l1d-prefetches-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20638 }, /* l1d-prefetches-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 20534 }, /* l1d-prefetches-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 17045 }, /* l1d-read\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 17404 }, /* l1d-read-access\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 17592 }, /* l1d-read-miss\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 17496 }, /* l1d-read-misses\000legacy cache\000Level 1 data cache read misses\000legacy-cache-config=0x10000\000\00010\000\000\000\000\000 */
+{ 17315 }, /* l1d-read-ops\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 17220 }, /* l1d-read-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 17130 }, /* l1d-read-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 22797 }, /* l1d-reference\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 22712 }, /* l1d-refs\000legacy cache\000Level 1 data cache read accesses\000legacy-cache-config=0\000\00010\000\000\000\000\000 */
+{ 21939 }, /* l1d-speculative-load\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 22378 }, /* l1d-speculative-load-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 22602 }, /* l1d-speculative-load-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 22490 }, /* l1d-speculative-load-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 22269 }, /* l1d-speculative-load-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 22154 }, /* l1d-speculative-load-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 22044 }, /* l1d-speculative-load-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21166 }, /* l1d-speculative-read\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21605 }, /* l1d-speculative-read-access\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21829 }, /* l1d-speculative-read-miss\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 21717 }, /* l1d-speculative-read-misses\000legacy cache\000Level 1 data cache prefetch misses\000legacy-cache-config=0x10200\000\00010\000\000\000\000\000 */
+{ 21496 }, /* l1d-speculative-read-ops\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21381 }, /* l1d-speculative-read-reference\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 21271 }, /* l1d-speculative-read-refs\000legacy cache\000Level 1 data cache prefetch accesses\000legacy-cache-config=0x200\000\00010\000\000\000\000\000 */
+{ 17686 }, /* l1d-store\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18069 }, /* l1d-store-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18265 }, /* l1d-store-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 18167 }, /* l1d-store-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 17974 }, /* l1d-store-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 17873 }, /* l1d-store-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 17777 }, /* l1d-store-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18361 }, /* l1d-stores\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18748 }, /* l1d-stores-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18946 }, /* l1d-stores-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 18847 }, /* l1d-stores-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 18652 }, /* l1d-stores-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18550 }, /* l1d-stores-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 18453 }, /* l1d-stores-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 19043 }, /* l1d-write\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 19426 }, /* l1d-write-access\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 19622 }, /* l1d-write-miss\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 19524 }, /* l1d-write-misses\000legacy cache\000Level 1 data cache write misses\000legacy-cache-config=0x10100\000\00010\000\000\000\000\000 */
+{ 19331 }, /* l1d-write-ops\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 19230 }, /* l1d-write-reference\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 19134 }, /* l1d-write-refs\000legacy cache\000Level 1 data cache write accesses\000legacy-cache-config=0x100\000\00010\000\000\000\000\000 */
+{ 43344 }, /* l1i\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 48978 }, /* l1i-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43431 }, /* l1i-load\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43818 }, /* l1i-load-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44020 }, /* l1i-load-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 43917 }, /* l1i-load-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 43722 }, /* l1i-load-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43620 }, /* l1i-load-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 43523 }, /* l1i-load-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44121 }, /* l1i-loads\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44512 }, /* l1i-loads-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44716 }, /* l1i-loads-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 44612 }, /* l1i-loads-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 44415 }, /* l1i-loads-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44312 }, /* l1i-loads-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44214 }, /* l1i-loads-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 49170 }, /* l1i-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 49072 }, /* l1i-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 48887 }, /* l1i-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 45508 }, /* l1i-prefetch\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 45943 }, /* l1i-prefetch-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46165 }, /* l1i-prefetch-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 46054 }, /* l1i-prefetch-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 45835 }, /* l1i-prefetch-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 45721 }, /* l1i-prefetch-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 45612 }, /* l1i-prefetch-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46274 }, /* l1i-prefetches\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46717 }, /* l1i-prefetches-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46943 }, /* l1i-prefetches-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 46830 }, /* l1i-prefetches-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 46607 }, /* l1i-prefetches-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46491 }, /* l1i-prefetches-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 46380 }, /* l1i-prefetches-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 44818 }, /* l1i-read\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 45205 }, /* l1i-read-access\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 45407 }, /* l1i-read-miss\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 45304 }, /* l1i-read-misses\000legacy cache\000Level 1 instruction cache read misses\000legacy-cache-config=0x10001\000\00010\000\000\000\000\000 */
+{ 45109 }, /* l1i-read-ops\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 45007 }, /* l1i-read-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 44910 }, /* l1i-read-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 48790 }, /* l1i-reference\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 48698 }, /* l1i-refs\000legacy cache\000Level 1 instruction cache read accesses\000legacy-cache-config=1\000\00010\000\000\000\000\000 */
+{ 47876 }, /* l1i-speculative-load\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 48343 }, /* l1i-speculative-load-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 48581 }, /* l1i-speculative-load-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 48462 }, /* l1i-speculative-load-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 48227 }, /* l1i-speculative-load-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 48105 }, /* l1i-speculative-load-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47988 }, /* l1i-speculative-load-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47054 }, /* l1i-speculative-read\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47521 }, /* l1i-speculative-read-access\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47759 }, /* l1i-speculative-read-miss\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 47640 }, /* l1i-speculative-read-misses\000legacy cache\000Level 1 instruction cache prefetch misses\000legacy-cache-config=0x10201\000\00010\000\000\000\000\000 */
+{ 47405 }, /* l1i-speculative-read-ops\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47283 }, /* l1i-speculative-read-reference\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 47166 }, /* l1i-speculative-read-refs\000legacy cache\000Level 1 instruction cache prefetch accesses\000legacy-cache-config=0x201\000\00010\000\000\000\000\000 */
+{ 63212 }, /* l2\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 71765 }, /* l2-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63309 }, /* l2-load\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63736 }, /* l2-load-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63958 }, /* l2-load-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 63845 }, /* l2-load-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 63630 }, /* l2-load-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63518 }, /* l2-load-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63411 }, /* l2-load-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64069 }, /* l2-loads\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64500 }, /* l2-loads-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64724 }, /* l2-loads-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 64610 }, /* l2-loads-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 64393 }, /* l2-loads-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64280 }, /* l2-loads-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64172 }, /* l2-loads-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 71977 }, /* l2-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 71869 }, /* l2-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 71664 }, /* l2-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 67985 }, /* l2-prefetch\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68460 }, /* l2-prefetch-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68702 }, /* l2-prefetch-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 68581 }, /* l2-prefetch-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 68342 }, /* l2-prefetch-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68218 }, /* l2-prefetch-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68099 }, /* l2-prefetch-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68821 }, /* l2-prefetches\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69304 }, /* l2-prefetches-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69550 }, /* l2-prefetches-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 69427 }, /* l2-prefetches-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 69184 }, /* l2-prefetches-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69058 }, /* l2-prefetches-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 68937 }, /* l2-prefetches-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 64836 }, /* l2-read\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 65263 }, /* l2-read-access\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 65485 }, /* l2-read-miss\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 65372 }, /* l2-read-misses\000legacy cache\000Level 2 (or higher) last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 65157 }, /* l2-read-ops\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 65045 }, /* l2-read-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 64938 }, /* l2-read-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 71557 }, /* l2-reference\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 71455 }, /* l2-refs\000legacy cache\000Level 2 (or higher) last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 70563 }, /* l2-speculative-load\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 71070 }, /* l2-speculative-load-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 71328 }, /* l2-speculative-load-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 71199 }, /* l2-speculative-load-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 70944 }, /* l2-speculative-load-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 70812 }, /* l2-speculative-load-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 70685 }, /* l2-speculative-load-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69671 }, /* l2-speculative-read\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 70178 }, /* l2-speculative-read-access\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 70436 }, /* l2-speculative-read-miss\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 70307 }, /* l2-speculative-read-misses\000legacy cache\000Level 2 (or higher) last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 70052 }, /* l2-speculative-read-ops\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69920 }, /* l2-speculative-read-reference\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 69793 }, /* l2-speculative-read-refs\000legacy cache\000Level 2 (or higher) last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 65596 }, /* l2-store\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66047 }, /* l2-store-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66277 }, /* l2-store-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 66162 }, /* l2-store-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 65935 }, /* l2-store-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 65817 }, /* l2-store-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 65704 }, /* l2-store-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66390 }, /* l2-stores\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66845 }, /* l2-stores-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67077 }, /* l2-stores-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 66961 }, /* l2-stores-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 66732 }, /* l2-stores-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66613 }, /* l2-stores-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 66499 }, /* l2-stores-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67191 }, /* l2-write\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67642 }, /* l2-write-access\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67872 }, /* l2-write-miss\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 67757 }, /* l2-write-misses\000legacy cache\000Level 2 (or higher) last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 67530 }, /* l2-write-ops\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67412 }, /* l2-write-reference\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 67299 }, /* l2-write-refs\000legacy cache\000Level 2 (or higher) last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 55804 }, /* llc\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 62951 }, /* llc-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 55882 }, /* llc-load\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56233 }, /* llc-load-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56417 }, /* llc-load-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 56323 }, /* llc-load-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00000\000\000\000\000\000 */
+{ 56146 }, /* llc-load-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56053 }, /* llc-load-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 55965 }, /* llc-load-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56509 }, /* llc-loads\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00000\000\000\000\000\000 */
+{ 56864 }, /* llc-loads-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 57050 }, /* llc-loads-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 56955 }, /* llc-loads-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 56776 }, /* llc-loads-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56682 }, /* llc-loads-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 56593 }, /* llc-loads-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 63125 }, /* llc-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 63036 }, /* llc-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 62869 }, /* llc-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 59760 }, /* llc-prefetch\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 60159 }, /* llc-prefetch-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 60363 }, /* llc-prefetch-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 60261 }, /* llc-prefetch-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00000\000\000\000\000\000 */
+{ 60060 }, /* llc-prefetch-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 59955 }, /* llc-prefetch-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 59855 }, /* llc-prefetch-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 60463 }, /* llc-prefetches\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00000\000\000\000\000\000 */
+{ 60870 }, /* llc-prefetches-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61078 }, /* llc-prefetches-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 60974 }, /* llc-prefetches-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 60769 }, /* llc-prefetches-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 60662 }, /* llc-prefetches-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 60560 }, /* llc-prefetches-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 57143 }, /* llc-read\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 57494 }, /* llc-read-access\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 57678 }, /* llc-read-miss\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 57584 }, /* llc-read-misses\000legacy cache\000Last level cache read misses\000legacy-cache-config=0x10002\000\00010\000\000\000\000\000 */
+{ 57407 }, /* llc-read-ops\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 57314 }, /* llc-read-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 57226 }, /* llc-read-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 62781 }, /* llc-reference\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 62698 }, /* llc-refs\000legacy cache\000Last level cache read accesses\000legacy-cache-config=2\000\00010\000\000\000\000\000 */
+{ 61939 }, /* llc-speculative-load\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 62370 }, /* llc-speculative-load-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 62590 }, /* llc-speculative-load-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 62480 }, /* llc-speculative-load-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 62263 }, /* llc-speculative-load-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 62150 }, /* llc-speculative-load-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 62042 }, /* llc-speculative-load-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61180 }, /* llc-speculative-read\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61611 }, /* llc-speculative-read-access\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61831 }, /* llc-speculative-read-miss\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 61721 }, /* llc-speculative-read-misses\000legacy cache\000Last level cache prefetch misses\000legacy-cache-config=0x10202\000\00010\000\000\000\000\000 */
+{ 61504 }, /* llc-speculative-read-ops\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61391 }, /* llc-speculative-read-reference\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 61283 }, /* llc-speculative-read-refs\000legacy cache\000Last level cache prefetch accesses\000legacy-cache-config=0x202\000\00010\000\000\000\000\000 */
+{ 57770 }, /* llc-store\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 58145 }, /* llc-store-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 58337 }, /* llc-store-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 58241 }, /* llc-store-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00000\000\000\000\000\000 */
+{ 58052 }, /* llc-store-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 57953 }, /* llc-store-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 57859 }, /* llc-store-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 58431 }, /* llc-stores\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00000\000\000\000\000\000 */
+{ 58810 }, /* llc-stores-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59004 }, /* llc-stores-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 58907 }, /* llc-stores-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 58716 }, /* llc-stores-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 58616 }, /* llc-stores-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 58521 }, /* llc-stores-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59099 }, /* llc-write\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59474 }, /* llc-write-access\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59666 }, /* llc-write-miss\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 59570 }, /* llc-write-misses\000legacy cache\000Last level cache write misses\000legacy-cache-config=0x10102\000\00010\000\000\000\000\000 */
+{ 59381 }, /* llc-write-ops\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59282 }, /* llc-write-reference\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 59188 }, /* llc-write-refs\000legacy cache\000Last level cache write accesses\000legacy-cache-config=0x102\000\00010\000\000\000\000\000 */
+{ 114128 }, /* node\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 121053 }, /* node-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114203 }, /* node-load\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114542 }, /* node-load-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114720 }, /* node-load-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 114629 }, /* node-load-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00000\000\000\000\000\000 */
+{ 114458 }, /* node-load-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114368 }, /* node-load-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114283 }, /* node-load-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114809 }, /* node-loads\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00000\000\000\000\000\000 */
+{ 115152 }, /* node-loads-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 115332 }, /* node-loads-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 115240 }, /* node-loads-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 115067 }, /* node-loads-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114976 }, /* node-loads-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 114890 }, /* node-loads-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 121221 }, /* node-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 121135 }, /* node-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 120974 }, /* node-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 117955 }, /* node-prefetch\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118342 }, /* node-prefetch-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118540 }, /* node-prefetch-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 118441 }, /* node-prefetch-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00000\000\000\000\000\000 */
+{ 118246 }, /* node-prefetch-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118144 }, /* node-prefetch-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118047 }, /* node-prefetch-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118637 }, /* node-prefetches\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00000\000\000\000\000\000 */
+{ 119032 }, /* node-prefetches-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119234 }, /* node-prefetches-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 119133 }, /* node-prefetches-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 118934 }, /* node-prefetches-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118830 }, /* node-prefetches-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 118731 }, /* node-prefetches-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 115422 }, /* node-read\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 115761 }, /* node-read-access\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 115939 }, /* node-read-miss\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 115848 }, /* node-read-misses\000legacy cache\000Local memory read misses\000legacy-cache-config=0x10006\000\00010\000\000\000\000\000 */
+{ 115677 }, /* node-read-ops\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 115587 }, /* node-read-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 115502 }, /* node-read-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 120889 }, /* node-reference\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 120809 }, /* node-refs\000legacy cache\000Local memory read accesses\000legacy-cache-config=6\000\00010\000\000\000\000\000 */
+{ 120071 }, /* node-speculative-load\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 120490 }, /* node-speculative-load-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 120704 }, /* node-speculative-load-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 120597 }, /* node-speculative-load-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 120386 }, /* node-speculative-load-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 120276 }, /* node-speculative-load-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 120171 }, /* node-speculative-load-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119333 }, /* node-speculative-read\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119752 }, /* node-speculative-read-access\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119966 }, /* node-speculative-read-miss\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 119859 }, /* node-speculative-read-misses\000legacy cache\000Local memory prefetch misses\000legacy-cache-config=0x10206\000\00010\000\000\000\000\000 */
+{ 119648 }, /* node-speculative-read-ops\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119538 }, /* node-speculative-read-reference\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 119433 }, /* node-speculative-read-refs\000legacy cache\000Local memory prefetch accesses\000legacy-cache-config=0x206\000\00010\000\000\000\000\000 */
+{ 116028 }, /* node-store\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116391 }, /* node-store-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116577 }, /* node-store-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000 */
+{ 116484 }, /* node-store-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00000\000\000\000\000\000 */
+{ 116301 }, /* node-store-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116205 }, /* node-store-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116114 }, /* node-store-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116668 }, /* node-stores\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00000\000\000\000\000\000 */
+{ 117035 }, /* node-stores-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117223 }, /* node-stores-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000 */
+{ 117129 }, /* node-stores-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000 */
+{ 116944 }, /* node-stores-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116847 }, /* node-stores-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 116755 }, /* node-stores-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117315 }, /* node-write\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117678 }, /* node-write-access\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117864 }, /* node-write-miss\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000 */
+{ 117771 }, /* node-write-misses\000legacy cache\000Local memory write misses\000legacy-cache-config=0x10106\000\00010\000\000\000\000\000 */
+{ 117588 }, /* node-write-ops\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117492 }, /* node-write-reference\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 117401 }, /* node-write-refs\000legacy cache\000Local memory write accesses\000legacy-cache-config=0x106\000\00010\000\000\000\000\000 */
+{ 123400 }, /* ref-cycles\000legacy hardware\000Total cycles; not affected by CPU frequency scaling\000legacy-hardware-config=9\000\00000\000\000\000\000\000 */
+{ 123094 }, /* stalled-cycles-backend\000legacy hardware\000Stalled cycles during retirement [This event is an alias of idle-cycles-backend]\000legacy-hardware-config=8\000\00000\000\000\000\000\000 */
+{ 122795 }, /* stalled-cycles-frontend\000legacy hardware\000Stalled cycles during issue [This event is an alias of idle-cycles-frontend]\000legacy-hardware-config=7\000\00000\000\000\000\000\000 */
+};
static const struct compact_pmu_event pmu_events__common_software[] = {
-{ 1035 }, /* alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000 */
-{ 1334 }, /* bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000 */
-{ 1436 }, /* cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000 */
-{ 357 }, /* context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000 */
-{ 9 }, /* cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\00000\000\000\000\000\000 */
-{ 559 }, /* cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000 */
-{ 458 }, /* cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000 */
-{ 1254 }, /* dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000 */
-{ 1127 }, /* emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000 */
-{ 167 }, /* faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000 */
-{ 932 }, /* major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000 */
-{ 691 }, /* migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000 */
-{ 823 }, /* minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000 */
-{ 262 }, /* page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000 */
-{ 87 }, /* task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\00000\000\000\000\000\000 */
+{ 124563 }, /* alignment-faults\000software\000Number of kernel handled memory alignment faults\000config=7\000\00000\000\000\000\000\000 */
+{ 124862 }, /* bpf-output\000software\000An event used by BPF programs to write to the perf ring buffer\000config=0xa\000\00000\000\000\000\000\000 */
+{ 124964 }, /* cgroup-switches\000software\000Number of context switches to a task in a different cgroup\000config=0xb\000\00000\000\000\000\000\000 */
+{ 123885 }, /* context-switches\000software\000Number of context switches [This event is an alias of cs]\000config=3\000\00000\000\000\000\000\000 */
+{ 123521 }, /* cpu-clock\000software\000Per-CPU high-resolution timer based event\000config=0\000\000001e-6msec\000\000\000\000\000 */
+{ 124087 }, /* cpu-migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of migrations]\000config=4\000\00000\000\000\000\000\000 */
+{ 123986 }, /* cs\000software\000Number of context switches [This event is an alias of context-switches]\000config=3\000\00000\000\000\000\000\000 */
+{ 124782 }, /* dummy\000software\000A placeholder event that doesn't count anything\000config=9\000\00000\000\000\000\000\000 */
+{ 124655 }, /* emulation-faults\000software\000Number of kernel handled unimplemented instruction faults handled through emulation\000config=8\000\00000\000\000\000\000\000 */
+{ 123695 }, /* faults\000software\000Number of page faults [This event is an alias of page-faults]\000config=2\000\00000\000\000\000\000\000 */
+{ 124460 }, /* major-faults\000software\000Number of major page faults. Major faults require I/O to handle\000config=6\000\00000\000\000\000\000\000 */
+{ 124219 }, /* migrations\000software\000Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]\000config=4\000\00000\000\000\000\000\000 */
+{ 124351 }, /* minor-faults\000software\000Number of minor page faults. Minor faults don't require I/O to handle\000config=5\000\00000\000\000\000\000\000 */
+{ 123790 }, /* page-faults\000software\000Number of page faults [This event is an alias of faults]\000config=2\000\00000\000\000\000\000\000 */
+{ 123607 }, /* task-clock\000software\000Per-task high-resolution timer based event\000config=1\000\000001e-6msec\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__common_tool[] = {
-{ 1544 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000 */
-{ 1758 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000 */
-{ 1834 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000 */
-{ 1979 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000 */
-{ 2082 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000 */
-{ 2199 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000 */
-{ 2275 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000 */
-{ 2361 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000 */
-{ 2471 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000 */
-{ 1690 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000 */
-{ 2578 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000 */
-{ 1620 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000 */
+{ 126205 }, /* core_wide\000tool\0001 if not SMT, if SMT are events being gathered on all SMT threads 1 otherwise 0\000config=0xd\000\00000\000\000\000\000\000 */
+{ 125072 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000\000\000\000 */
+{ 125286 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000\000\000\000 */
+{ 125362 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000\000\000\000 */
+{ 125507 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000\000\000\000 */
+{ 125610 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000\000\000\000 */
+{ 125727 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000\000\000\000 */
+{ 125803 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000\000\000\000 */
+{ 125889 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000\000\000\000 */
+{ 125999 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000\000\000\000 */
+{ 125218 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000\000\000\000 */
+{ 126106 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000\000\000\000 */
+{ 126319 }, /* target_cpu\000tool\0001 if CPUs being analyzed, 0 if threads/processes\000config=0xe\000\00000\000\000\000\000\000 */
+{ 125148 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000\000\000\000 */
};
-const struct pmu_table_entry pmu_events__common[] = {
+static const struct pmu_table_entry pmu_events__common[] = {
+{
+ .entries = pmu_events__common_default_core,
+ .num_entries = ARRAY_SIZE(pmu_events__common_default_core),
+ .pmu_name = { 0 /* default_core\000 */ },
+},
{
.entries = pmu_events__common_software,
.num_entries = ARRAY_SIZE(pmu_events__common_software),
- .pmu_name = { 0 /* software\000 */ },
+ .pmu_name = { 123512 /* software\000 */ },
},
{
.entries = pmu_events__common_tool,
.num_entries = ARRAY_SIZE(pmu_events__common_tool),
- .pmu_name = { 1539 /* tool\000 */ },
+ .pmu_name = { 125067 /* tool\000 */ },
+},
+};
+
+static const struct compact_pmu_event pmu_metrics__common_default_core[] = {
+{ 127956 }, /* CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011 */
+{ 129273 }, /* backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001 */
+{ 129575 }, /* branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011 */
+{ 129755 }, /* branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001 */
+{ 128142 }, /* cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011 */
+{ 129399 }, /* cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011 */
+{ 130191 }, /* dtlb_miss_rate\000Default3\000dTLB\\-load\\-misses / dTLB\\-loads\000dtlb_miss_rate > 0.05\000dTLB miss rate\000\000100%\000\000\000\000001 */
+{ 129143 }, /* frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001 */
+{ 128866 }, /* insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001 */
+{ 130297 }, /* itlb_miss_rate\000Default3\000iTLB\\-load\\-misses / iTLB\\-loads\000itlb_miss_rate > 0.05\000iTLB miss rate\000\000100%\000\000\000\000001 */
+{ 130403 }, /* l1_prefetch_miss_rate\000Default4\000L1\\-dcache\\-prefetch\\-misses / L1\\-dcache\\-prefetches\000l1_prefetch_miss_rate > 0.05\000L1 prefetch miss rate\000\000100%\000\000\000\000001 */
+{ 129859 }, /* l1d_miss_rate\000Default2\000L1\\-dcache\\-load\\-misses / L1\\-dcache\\-loads\000l1d_miss_rate > 0.05\000L1D miss rate\000\000100%\000\000\000\000001 */
+{ 130076 }, /* l1i_miss_rate\000Default3\000L1\\-icache\\-load\\-misses / L1\\-icache\\-loads\000l1i_miss_rate > 0.05\000L1I miss rate\000\000100%\000\000\000\000001 */
+{ 129975 }, /* llc_miss_rate\000Default2\000LLC\\-load\\-misses / LLC\\-loads\000llc_miss_rate > 0.05\000LLC miss rate\000\000100%\000\000\000\000001 */
+{ 128375 }, /* migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011 */
+{ 128635 }, /* page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011 */
+{ 128979 }, /* stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001 */
+
+};
+
+static const struct pmu_table_entry pmu_metrics__common[] = {
+{
+ .entries = pmu_metrics__common_default_core,
+ .num_entries = ARRAY_SIZE(pmu_metrics__common_default_core),
+ .pmu_name = { 0 /* default_core\000 */ },
},
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = {
-{ 2690 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000 */
-{ 2752 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000 */
-{ 3014 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000 */
-{ 3147 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000 */
-{ 2814 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000 */
-{ 2912 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */
+{ 126403 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000\000\000\000 */
+{ 126465 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000\000\000\000 */
+{ 126727 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000\000\000\000 */
+{ 126860 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000\000\000\000 */
+{ 126527 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000\000\000\000Attributable Level 3 cache access, read\000 */
+{ 126625 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = {
-{ 3280 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000 */
+{ 126993 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = {
-{ 3642 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000 */
+{ 127355 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = {
-{ 3516 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000 */
-{ 3570 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000 */
-{ 3362 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000 */
+{ 127229 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000\000\000\000\000 */
+{ 127283 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000\000\000\000\000 */
+{ 127075 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = {
-{ 3825 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000 */
+{ 127538 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = {
-{ 3734 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000 */
+{ 127447 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000\000\000\000\000 */
};
-const struct pmu_table_entry pmu_events__test_soc_cpu[] = {
+static const struct pmu_table_entry pmu_events__test_soc_cpu[] = {
{
.entries = pmu_events__test_soc_cpu_default_core,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core),
- .pmu_name = { 2677 /* default_core\000 */ },
+ .pmu_name = { 0 /* default_core\000 */ },
},
{
.entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc),
- .pmu_name = { 3265 /* hisi_sccl,ddrc\000 */ },
+ .pmu_name = { 126978 /* hisi_sccl,ddrc\000 */ },
},
{
.entries = pmu_events__test_soc_cpu_hisi_sccl_l3c,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c),
- .pmu_name = { 3628 /* hisi_sccl,l3c\000 */ },
+ .pmu_name = { 127341 /* hisi_sccl,l3c\000 */ },
},
{
.entries = pmu_events__test_soc_cpu_uncore_cbox,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox),
- .pmu_name = { 3350 /* uncore_cbox\000 */ },
+ .pmu_name = { 127063 /* uncore_cbox\000 */ },
},
{
.entries = pmu_events__test_soc_cpu_uncore_imc,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc),
- .pmu_name = { 3814 /* uncore_imc\000 */ },
+ .pmu_name = { 127527 /* uncore_imc\000 */ },
},
{
.entries = pmu_events__test_soc_cpu_uncore_imc_free_running,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running),
- .pmu_name = { 3710 /* uncore_imc_free_running\000 */ },
+ .pmu_name = { 127423 /* uncore_imc_free_running\000 */ },
},
};
static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = {
-{ 4243 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */
-{ 4924 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */
-{ 4696 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */
-{ 4790 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */
-{ 4988 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */
-{ 5056 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */
-{ 4328 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */
-{ 4265 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */
-{ 5190 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */
-{ 5126 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */
-{ 5148 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */
-{ 5170 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */
-{ 4625 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */
-{ 4494 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */
-{ 4558 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */
+{ 130551 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000 */
+{ 131240 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000 */
+{ 131010 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000 */
+{ 131105 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000 */
+{ 131305 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000 */
+{ 131374 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000 */
+{ 130638 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000 */
+{ 130574 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000 */
+{ 131512 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000 */
+{ 131445 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\000000 */
+{ 131468 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\000000 */
+{ 131491 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\000000 */
+{ 130938 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000 */
+{ 130805 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */
+{ 130870 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */
};
-const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = {
+static const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = {
{
.entries = pmu_metrics__test_soc_cpu_default_core,
.num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core),
- .pmu_name = { 2677 /* default_core\000 */ },
+ .pmu_name = { 0 /* default_core\000 */ },
},
};
static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = {
-{ 4004 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */
+{ 127717 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = {
-{ 4100 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */
+{ 127813 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000 */
};
static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = {
-{ 3909 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */
+{ 127622 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000\000\000\000 */
};
-const struct pmu_table_entry pmu_events__test_soc_sys[] = {
+static const struct pmu_table_entry pmu_events__test_soc_sys[] = {
{
.entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu),
- .pmu_name = { 3985 /* uncore_sys_ccn_pmu\000 */ },
+ .pmu_name = { 127698 /* uncore_sys_ccn_pmu\000 */ },
},
{
.entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu),
- .pmu_name = { 4081 /* uncore_sys_cmn_pmu\000 */ },
+ .pmu_name = { 127794 /* uncore_sys_cmn_pmu\000 */ },
},
{
.entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu,
.num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu),
- .pmu_name = { 3890 /* uncore_sys_ddr_pmu\000 */ },
+ .pmu_name = { 127603 /* uncore_sys_ddr_pmu\000 */ },
},
};
@@ -284,7 +2801,7 @@ struct pmu_events_map {
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
-const struct pmu_events_map pmu_events_map[] = {
+static const struct pmu_events_map pmu_events_map[] = {
{
.arch = "common",
.cpuid = "common",
@@ -292,7 +2809,10 @@ const struct pmu_events_map pmu_events_map[] = {
.pmus = pmu_events__common,
.num_pmus = ARRAY_SIZE(pmu_events__common),
},
- .metric_table = {},
+ .metric_table = {
+ .pmus = pmu_metrics__common,
+ .num_pmus = ARRAY_SIZE(pmu_metrics__common),
+ },
},
{
.arch = "testarch",
@@ -390,6 +2910,8 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
pm->aggr_mode = *p - '0';
p++;
pm->event_grouping = *p - '0';
+ p++;
+ pm->default_show_events = *p - '0';
}
static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
@@ -461,6 +2983,8 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -482,6 +3006,8 @@ int pmu_events_table__find_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
+ if (!table)
+ return PMU_EVENTS__NOT_FOUND;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -502,6 +3028,8 @@ size_t pmu_events_table__num_events(const struct pmu_events_table *table,
{
size_t count = 0;
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -580,6 +3108,8 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
pmu_metric_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
fn, data);
@@ -596,6 +3126,8 @@ int pmu_metrics_table__find_metric(const struct pmu_metrics_table *table,
pmu_metric_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -707,6 +3239,22 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
return NULL;
}
+const struct pmu_events_table *perf_pmu__default_core_events_table(void)
+{
+ int i = 0;
+
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+
+ if (!map->arch)
+ break;
+
+ if (!strcmp(map->cpuid, "common"))
+ return &map->event_table;
+ }
+ return NULL;
+}
+
const struct pmu_metrics_table *pmu_metrics_table__find(void)
{
struct perf_cpu cpu = {-1};
@@ -715,6 +3263,22 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void)
return map ? &map->metric_table : NULL;
}
+const struct pmu_metrics_table *pmu_metrics_table__default(void)
+{
+ int i = 0;
+
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+
+ if (!map->arch)
+ break;
+
+ if (!strcmp(map->cpuid, "common"))
+ return &map->metric_table;
+ }
+ return NULL;
+}
+
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 168c044dd7cc..3a1bcdcdc685 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -58,10 +58,12 @@ _json_event_attributes = [
_json_metric_attributes = [
'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
- 'default_metricgroup_name', 'aggr_mode', 'event_grouping'
+ 'default_metricgroup_name', 'aggr_mode', 'event_grouping',
+ 'default_show_events'
]
# Attributes that are bools or enum int values, encoded as '0', '1',...
-_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
+_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg',
+ 'default_show_events']
def removesuffix(s: str, suffix: str) -> str:
"""Remove the suffix from a string
@@ -325,6 +327,8 @@ class JsonEvent:
eventcode |= int(jd['ExtSel']) << 8
configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
+ legacy_hw_config = int(jd['LegacyConfigCode'], 0) if 'LegacyConfigCode' in jd else None
+ legacy_cache_config = int(jd['LegacyCacheCode'], 0) if 'LegacyCacheCode' in jd else None
self.name = jd['EventName'].lower() if 'EventName' in jd else None
self.topic = ''
self.compat = jd.get('Compat')
@@ -354,6 +358,7 @@ class JsonEvent:
self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
+ self.default_show_events = jd.get('DefaultShowEvents')
self.metric_expr = None
if 'MetricExpr' in jd:
self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
@@ -370,6 +375,10 @@ class JsonEvent:
event = f'config={llx(configcode)}'
elif eventidcode is not None:
event = f'eventid={llx(eventidcode)}'
+ elif legacy_hw_config is not None:
+ event = f'legacy-hardware-config={llx(legacy_hw_config)}'
+ elif legacy_cache_config is not None:
+ event = f'legacy-cache-config={llx(legacy_cache_config)}'
else:
event = f'event={llx(eventcode)}'
event_fields = [
@@ -492,7 +501,8 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
for e in read_json_events(item.path, topic):
if e.name:
_pending_events.append(e)
- if e.metric_name:
+ if e.metric_name and not any(e.metric_name == x.metric_name and
+ e.pmu == x.pmu for x in _pending_metrics):
_pending_metrics.append(e)
@@ -544,7 +554,7 @@ def print_pending_events() -> None:
_args.output_file.write(f"""
}};
-const struct pmu_table_entry {_pending_events_tblname}[] = {{
+static const struct pmu_table_entry {_pending_events_tblname}[] = {{
""")
for (pmu, tbl_pmu) in sorted(pmus):
pmu_name = f"{pmu}\\000"
@@ -599,7 +609,7 @@ def print_pending_metrics() -> None:
_args.output_file.write(f"""
}};
-const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
+static const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
""")
for (pmu, tbl_pmu) in sorted(pmus):
pmu_name = f"{pmu}\\000"
@@ -631,7 +641,7 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
if not item.is_file() or not item.name.endswith('.json'):
return
- if item.name == 'metricgroups.json':
+ if item.name.endswith('metricgroups.json'):
metricgroup_descriptions = json.load(open(item.path))
for mgroup in metricgroup_descriptions:
assert len(mgroup) > 1, parents
@@ -684,7 +694,7 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
# Ignore other directories. If the file name does not have a .json
# extension, ignore it. It could be a readme.txt for instance.
- if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
+ if not item.is_file() or not item.name.endswith('.json') or item.name.endswith('metricgroups.json'):
return
add_events_table_entries(item, get_topic(item.name))
@@ -724,7 +734,7 @@ struct pmu_events_map {
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
-const struct pmu_events_map pmu_events_map[] = {
+static const struct pmu_events_map pmu_events_map[] = {
""")
for arch in archs:
if arch == 'test':
@@ -749,7 +759,10 @@ const struct pmu_events_map pmu_events_map[] = {
\t\t.pmus = pmu_events__common,
\t\t.num_pmus = ARRAY_SIZE(pmu_events__common),
\t},
-\t.metric_table = {},
+\t.metric_table = {
+\t\t.pmus = pmu_metrics__common,
+\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__common),
+\t},
},
""")
else:
@@ -951,6 +964,8 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -972,6 +987,8 @@ int pmu_events_table__find_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
+ if (!table)
+ return PMU_EVENTS__NOT_FOUND;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -992,6 +1009,8 @@ size_t pmu_events_table__num_events(const struct pmu_events_table *table,
{
size_t count = 0;
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -1070,6 +1089,8 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
pmu_metric_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
fn, data);
@@ -1086,6 +1107,8 @@ int pmu_metrics_table__find_metric(const struct pmu_metrics_table *table,
pmu_metric_iter_fn fn,
void *data)
{
+ if (!table)
+ return 0;
for (size_t i = 0; i < table->num_pmus; i++) {
const struct pmu_table_entry *table_pmu = &table->pmus[i];
const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
@@ -1197,6 +1220,22 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
return NULL;
}
+const struct pmu_events_table *perf_pmu__default_core_events_table(void)
+{
+ int i = 0;
+
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+
+ if (!map->arch)
+ break;
+
+ if (!strcmp(map->cpuid, "common"))
+ return &map->event_table;
+ }
+ return NULL;
+}
+
const struct pmu_metrics_table *pmu_metrics_table__find(void)
{
struct perf_cpu cpu = {-1};
@@ -1205,6 +1244,22 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void)
return map ? &map->metric_table : NULL;
}
+const struct pmu_metrics_table *pmu_metrics_table__default(void)
+{
+ int i = 0;
+
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+
+ if (!map->arch)
+ break;
+
+ if (!strcmp(map->cpuid, "common"))
+ return &map->metric_table;
+ }
+ return NULL;
+}
+
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
diff --git a/tools/perf/pmu-events/make_legacy_cache.py b/tools/perf/pmu-events/make_legacy_cache.py
new file mode 100755
index 000000000000..28a1ff804f86
--- /dev/null
+++ b/tools/perf/pmu-events/make_legacy_cache.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+import json
+
+hw_cache_id = [
+ (0, # PERF_COUNT_HW_CACHE_L1D
+ ["L1-dcache", "l1-d", "l1d", "L1-data",],
+ [0, 1, 2,], # read, write, prefetch
+ "Level 1 data cache",
+ ),
+ (1, # PERF_COUNT_HW_CACHE_L1I
+ ["L1-icache", "l1-i", "l1i", "L1-instruction",],
+ [0, 2,], # read, prefetch
+ "Level 1 instruction cache",
+ ),
+ (2, # PERF_COUNT_HW_CACHE_LL
+ ["LLC", "L2"],
+ [0, 1, 2,], # read, write, prefetch
+ "Last level cache",
+ ),
+ (3, # PERF_COUNT_HW_CACHE_DTLB
+ ["dTLB", "d-tlb", "Data-TLB",],
+ [0, 1, 2,], # read, write, prefetch
+ "Data TLB",
+ ),
+ (4, # PERF_COUNT_HW_CACHE_ITLB
+ ["iTLB", "i-tlb", "Instruction-TLB",],
+ [0,], # read
+ "Instruction TLB",
+ ),
+ (5, # PERF_COUNT_HW_CACHE_BPU
+ ["branch", "branches", "bpu", "btb", "bpc",],
+ [0,], # read
+ "Branch prediction unit",
+ ),
+ (6, # PERF_COUNT_HW_CACHE_NODE
+ ["node",],
+ [0, 1, 2,], # read, write, prefetch
+ "Local memory",
+ ),
+]
+
+hw_cache_op = [
+ (0, # PERF_COUNT_HW_CACHE_OP_READ
+ ["load", "loads", "read",],
+ "read"),
+ (1, # PERF_COUNT_HW_CACHE_OP_WRITE
+ ["store", "stores", "write",],
+ "write"),
+ (2, # PERF_COUNT_HW_CACHE_OP_PREFETCH
+ ["prefetch", "prefetches", "speculative-read", "speculative-load",],
+ "prefetch"),
+]
+
+hw_cache_result = [
+ (0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
+ ["refs", "Reference", "ops", "access",],
+ "accesses"),
+ (1, # PERF_COUNT_HW_CACHE_RESULT_MISS
+ ["misses", "miss",],
+ "misses"),
+]
+
+events = []
+def add_event(name: str,
+ cache_id: int, cache_op: int, cache_result: int,
+ desc: str,
+ deprecated: bool) -> None:
+ # Avoid conflicts with PERF_TYPE_HARDWARE events which are higher priority.
+ if name in ["branch-misses", "branches"]:
+ return
+
+ # Tweak and deprecate L2 named events.
+ if name.startswith("L2"):
+ desc = desc.replace("Last level cache", "Level 2 (or higher) last level cache")
+ deprecated = True
+
+ event = {
+ "EventName": name,
+ "BriefDescription": desc,
+ "LegacyCacheCode": f"0x{cache_id | (cache_op << 8) | (cache_result << 16):06x}",
+ }
+
+ # Deprecate events with the name starting L2 as it is actively
+ # confusing as on many machines it actually means the L3 cache.
+ if deprecated:
+ event["Deprecated"] = "1"
+ events.append(event)
+
+for (cache_id, names, ops, cache_desc) in hw_cache_id:
+ for name in names:
+ add_event(name,
+ cache_id,
+ 0, # PERF_COUNT_HW_CACHE_OP_READ
+ 0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
+ f"{cache_desc} read accesses.",
+ deprecated=True)
+
+ for (op, op_names, op_desc) in hw_cache_op:
+ if op not in ops:
+ continue
+ for op_name in op_names:
+ deprecated = (names[0] != name or op_names[1] != op_name)
+ add_event(f"{name}-{op_name}",
+ cache_id,
+ op,
+ 0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
+ f"{cache_desc} {op_desc} accesses.",
+ deprecated)
+
+ for (result, result_names, result_desc) in hw_cache_result:
+ for result_name in result_names:
+ deprecated = ((names[0] != name or op_names[0] != op_name) or
+ (result == 0) or (result_names[0] != result_name))
+ add_event(f"{name}-{op_name}-{result_name}",
+ cache_id, op, result,
+ f"{cache_desc} {op_desc} {result_desc}.",
+ deprecated)
+
+ for (result, result_names, result_desc) in hw_cache_result:
+ for result_name in result_names:
+ add_event(f"{name}-{result_name}",
+ cache_id,
+ 0, # PERF_COUNT_HW_CACHE_OP_READ
+ result,
+ f"{cache_desc} read {result_desc}.",
+ deprecated=True)
+
+print(json.dumps(events, indent=2))
diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py
index 92acd89ed97a..dd8fd06940e6 100644
--- a/tools/perf/pmu-events/metric.py
+++ b/tools/perf/pmu-events/metric.py
@@ -4,8 +4,14 @@ import ast
import decimal
import json
import re
+from enum import Enum
from typing import Dict, List, Optional, Set, Tuple, Union
+class MetricConstraint(Enum):
+ GROUPED_EVENTS = 0
+ NO_GROUP_EVENTS = 1
+ NO_GROUP_EVENTS_NMI = 2
+ NO_GROUP_EVENTS_SMT = 3
class Expression:
"""Abstract base class of elements in a metric expression."""
@@ -423,14 +429,16 @@ class Metric:
groups: Set[str]
expr: Expression
scale_unit: str
- constraint: bool
+ constraint: MetricConstraint
+ threshold: Optional[Expression]
def __init__(self,
name: str,
description: str,
expr: Expression,
scale_unit: str,
- constraint: bool = False):
+ constraint: MetricConstraint = MetricConstraint.GROUPED_EVENTS,
+ threshold: Optional[Expression] = None):
self.name = name
self.description = description
self.expr = expr.Simplify()
@@ -441,6 +449,7 @@ class Metric:
else:
self.scale_unit = f'1{scale_unit}'
self.constraint = constraint
+ self.threshold = threshold
self.groups = set()
def __lt__(self, other):
@@ -449,7 +458,8 @@ class Metric:
def AddToMetricGroup(self, group):
"""Callback used when being added to a MetricGroup."""
- self.groups.add(group.name)
+ if group.name:
+ self.groups.add(group.name)
def Flatten(self) -> Set['Metric']:
"""Return a leaf metric."""
@@ -464,20 +474,15 @@ class Metric:
'MetricExpr': self.expr.ToPerfJson(),
'ScaleUnit': self.scale_unit
}
- if self.constraint:
- result['MetricConstraint'] = 'NO_NMI_WATCHDOG'
+ if self.constraint != MetricConstraint.GROUPED_EVENTS:
+ result['MetricConstraint'] = self.constraint.name
+ if self.threshold:
+ result['MetricThreshold'] = self.threshold.ToPerfJson()
return result
-
-class _MetricJsonEncoder(json.JSONEncoder):
- """Special handling for Metric objects."""
-
- def default(self, o):
- if isinstance(o, Metric):
- return o.ToPerfJson()
- return json.JSONEncoder.default(self, o)
-
+ def ToMetricGroupDescriptions(self, root: bool = True) -> Dict[str, str]:
+ return {}
class MetricGroup:
"""A group of metrics.
@@ -487,12 +492,16 @@ class MetricGroup:
which can facilitate arrangements similar to trees.
"""
- def __init__(self, name: str, metric_list: List[Union[Metric,
- 'MetricGroup']]):
+ def __init__(self, name: str,
+ metric_list: List[Union[Optional[Metric], Optional['MetricGroup']]],
+ description: Optional[str] = None):
self.name = name
- self.metric_list = metric_list
+ self.metric_list = []
+ self.description = description
for metric in metric_list:
- metric.AddToMetricGroup(self)
+ if metric:
+ self.metric_list.append(metric)
+ metric.AddToMetricGroup(self)
def AddToMetricGroup(self, group):
"""Callback used when a MetricGroup is added into another."""
@@ -507,11 +516,36 @@ class MetricGroup:
return result
- def ToPerfJson(self) -> str:
- return json.dumps(sorted(self.Flatten()), indent=2, cls=_MetricJsonEncoder)
+ def ToPerfJson(self) -> List[Dict[str, str]]:
+ result = []
+ for x in sorted(self.Flatten()):
+ result.append(x.ToPerfJson())
+ return result
+
+ def ToMetricGroupDescriptions(self, root: bool = True) -> Dict[str, str]:
+ result = {self.name: self.description} if self.description else {}
+ for x in self.metric_list:
+ result.update(x.ToMetricGroupDescriptions(False))
+ return result
def __str__(self) -> str:
- return self.ToPerfJson()
+ return str(self.ToPerfJson())
+
+
+def JsonEncodeMetric(x: MetricGroup):
+ class MetricJsonEncoder(json.JSONEncoder):
+ """Special handling for Metric objects."""
+
+ def default(self, o):
+ if isinstance(o, Metric) or isinstance(o, MetricGroup):
+ return o.ToPerfJson()
+ return json.JSONEncoder.default(self, o)
+
+ return json.dumps(x, indent=2, cls=MetricJsonEncoder)
+
+
+def JsonEncodeMetricGroupDescriptions(x: MetricGroup):
+ return json.dumps(x.ToMetricGroupDescriptions(), indent=2)
class _RewriteIfExpToSelect(ast.NodeTransformer):
@@ -551,12 +585,18 @@ def ParsePerfJson(orig: str) -> Expression:
r'Event(r"\1")', py)
# If it started with a # it should have been a literal, rather than an event name
py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py)
+ # Fix events wrongly broken at a ','
+ while True:
+ prev_py = py
+ py = re.sub(r'Event\(r"([^"]*)"\),Event\(r"([^"]*)"\)', r'Event(r"\1,\2")', py)
+ if py == prev_py:
+ break
# Convert accidentally converted hex constants ("0Event(r"xDEADBEEF)"") back to a constant,
# but keep it wrapped in Event(), otherwise Python drops the 0x prefix and it gets interpreted as
# a double by the Bison parser
py = re.sub(r'0Event\(r"[xX]([0-9a-fA-F]*)"\)', r'Event("0x\1")', py)
# Convert accidentally converted scientific notation constants back
- py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py)
+ py = re.sub(r'([0-9]+)Event\(r"(e[0-9]*)"\)', r'\1\2', py)
# Convert all the known keywords back from events to just the keyword
keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event', 'strcmp_cpuid_str']
for kw in keywords:
@@ -569,7 +609,6 @@ def ParsePerfJson(orig: str) -> Expression:
parsed = ast.fix_missing_locations(parsed)
return _Constify(eval(compile(parsed, orig, 'eval')))
-
def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, str, Expression]]
)-> Dict[Tuple[str, str], Expression]:
"""Shorten metrics by rewriting in terms of others.
diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py
index ee22ff43ddd7..8acfe4652b55 100755
--- a/tools/perf/pmu-events/metric_test.py
+++ b/tools/perf/pmu-events/metric_test.py
@@ -61,6 +61,10 @@ class TestMetricExpressions(unittest.TestCase):
after = before
self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+ before = r'a + 3e-12 + b'
+ after = before
+ self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
def test_IfElseTests(self):
# if-else needs rewriting to Select and back.
before = r'Event1 if #smt_on else Event2'
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index ea022ea55087..d3b24014c6ff 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -74,6 +74,7 @@ struct pmu_metric {
const char *default_metricgroup_name;
enum aggr_mode_class aggr_mode;
enum metric_event_groups event_grouping;
+ bool default_show_events;
};
struct pmu_events_table;
@@ -125,7 +126,9 @@ int pmu_metrics_table__find_metric(const struct pmu_metrics_table *table,
void *data);
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);
+const struct pmu_events_table *perf_pmu__default_core_events_table(void);
const struct pmu_metrics_table *pmu_metrics_table__find(void);
+const struct pmu_metrics_table *pmu_metrics_table__default(void);
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid);
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid);
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data);
diff --git a/tools/perf/python/ilist.py b/tools/perf/python/ilist.py
new file mode 100755
index 000000000000..0d757ddb4795
--- /dev/null
+++ b/tools/perf/python/ilist.py
@@ -0,0 +1,515 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+"""Interactive perf list."""
+
+from abc import ABC, abstractmethod
+import argparse
+from dataclasses import dataclass
+import math
+from typing import Any, Dict, Optional, Tuple
+import perf
+from textual import on
+from textual.app import App, ComposeResult
+from textual.binding import Binding
+from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalScroll
+from textual.css.query import NoMatches
+from textual.command import SearchIcon
+from textual.screen import ModalScreen
+from textual.widgets import Button, Footer, Header, Input, Label, Sparkline, Static, Tree
+from textual.widgets.tree import TreeNode
+
+
+def get_info(info: Dict[str, str], key: str):
+ return (info[key] + "\n") if key in info else ""
+
+
+class TreeValue(ABC):
+ """Abstraction for the data of value in the tree."""
+
+ @abstractmethod
+ def name(self) -> str:
+ pass
+
+ @abstractmethod
+ def description(self) -> str:
+ pass
+
+ @abstractmethod
+ def matches(self, query: str) -> bool:
+ pass
+
+ @abstractmethod
+ def parse(self) -> perf.evlist:
+ pass
+
+ @abstractmethod
+ def value(self, evlist: perf.evlist, evsel: perf.evsel, cpu: int, thread: int) -> float:
+ pass
+
+
+@dataclass
+class Metric(TreeValue):
+ """A metric in the tree."""
+ metric_name: str
+ metric_pmu: str
+
+ def name(self) -> str:
+ return self.metric_name
+
+ def description(self) -> str:
+ """Find and format metric description."""
+ for metric in perf.metrics():
+ if metric["MetricName"] != self.metric_name:
+ continue
+ if self.metric_pmu and metric["PMU"] != self.metric_pmu:
+ continue
+ desc = get_info(metric, "BriefDescription")
+ desc += get_info(metric, "PublicDescription")
+ desc += get_info(metric, "MetricExpr")
+ desc += get_info(metric, "MetricThreshold")
+ return desc
+ return "description"
+
+ def matches(self, query: str) -> bool:
+ return query in self.metric_name
+
+ def parse(self) -> perf.evlist:
+ return perf.parse_metrics(self.metric_name, self.metric_pmu)
+
+ def value(self, evlist: perf.evlist, evsel: perf.evsel, cpu: int, thread: int) -> float:
+ try:
+ val = evlist.compute_metric(self.metric_name, cpu, thread)
+ return 0 if math.isnan(val) else val
+ except:
+ # Be tolerant of failures to compute metrics on particular CPUs/threads.
+ return 0
+
+
+@dataclass
+class PmuEvent(TreeValue):
+ """A PMU and event within the tree."""
+ pmu: str
+ event: str
+
+ def name(self) -> str:
+ if self.event.startswith(self.pmu) or ':' in self.event:
+ return self.event
+ else:
+ return f"{self.pmu}/{self.event}/"
+
+ def description(self) -> str:
+ """Find and format event description for {pmu}/{event}/."""
+ for p in perf.pmus():
+ if p.name() != self.pmu:
+ continue
+ for info in p.events():
+ if "name" not in info or info["name"] != self.event:
+ continue
+
+ desc = get_info(info, "topic")
+ desc += get_info(info, "event_type_desc")
+ desc += get_info(info, "desc")
+ desc += get_info(info, "long_desc")
+ desc += get_info(info, "encoding_desc")
+ return desc
+ return "description"
+
+ def matches(self, query: str) -> bool:
+ return query in self.pmu or query in self.event
+
+ def parse(self) -> perf.evlist:
+ return perf.parse_events(self.name())
+
+ def value(self, evlist: perf.evlist, evsel: perf.evsel, cpu: int, thread: int) -> float:
+ return evsel.read(cpu, thread).val
+
+
+class ErrorScreen(ModalScreen[bool]):
+ """Pop up dialog for errors."""
+
+ CSS = """
+ ErrorScreen {
+ align: center middle;
+ }
+ """
+
+ def __init__(self, error: str):
+ self.error = error
+ super().__init__()
+
+ def compose(self) -> ComposeResult:
+ yield Button(f"Error: {self.error}", variant="primary", id="error")
+
+ def on_button_pressed(self, event: Button.Pressed) -> None:
+ self.dismiss(True)
+
+
+class SearchScreen(ModalScreen[str]):
+ """Pop up dialog for search."""
+
+ CSS = """
+ SearchScreen Horizontal {
+ align: center middle;
+ margin-top: 1;
+ }
+ SearchScreen Input {
+ width: 1fr;
+ }
+ """
+
+ def compose(self) -> ComposeResult:
+ yield Horizontal(SearchIcon(), Input(placeholder="Event name"))
+
+ def on_input_submitted(self, event: Input.Submitted) -> None:
+ """Handle the user pressing Enter in the input field."""
+ self.dismiss(event.value)
+
+
+class Counter(HorizontalGroup):
+ """Two labels for a CPU and its counter value."""
+
+ CSS = """
+ Label {
+ gutter: 1;
+ }
+ """
+
+ def __init__(self, cpu: int) -> None:
+ self.cpu = cpu
+ super().__init__()
+
+ def compose(self) -> ComposeResult:
+ label = f"cpu{self.cpu}" if self.cpu >= 0 else "total"
+ yield Label(label + " ")
+ yield Label("0", id=f"counter_{label}")
+
+
+class CounterSparkline(HorizontalGroup):
+ """A Sparkline for a performance counter."""
+
+ def __init__(self, cpu: int) -> None:
+ self.cpu = cpu
+ super().__init__()
+
+ def compose(self) -> ComposeResult:
+ label = f"cpu{self.cpu}" if self.cpu >= 0 else "total"
+ yield Label(label)
+ yield Sparkline([], summary_function=max, id=f"sparkline_{label}")
+
+
+class IListApp(App):
+ TITLE = "Interactive Perf List"
+
+ BINDINGS = [
+ Binding(key="s", action="search", description="Search",
+ tooltip="Search events and PMUs"),
+ Binding(key="n", action="next", description="Next",
+ tooltip="Next search result or item"),
+ Binding(key="p", action="prev", description="Previous",
+ tooltip="Previous search result or item"),
+ Binding(key="c", action="collapse", description="Collapse",
+ tooltip="Collapse the current PMU"),
+ Binding(key="^q", action="quit", description="Quit",
+ tooltip="Quit the app"),
+ ]
+
+ CSS = """
+ /* Make the 'total' sparkline a different color. */
+ #sparkline_total > .sparkline--min-color {
+ color: $accent;
+ }
+ #sparkline_total > .sparkline--max-color {
+ color: $accent 30%;
+ }
+ /*
+ * Make the active_search initially not displayed with the text in
+ * the middle of the line.
+ */
+ #active_search {
+ display: none;
+ width: 100%;
+ text-align: center;
+ }
+ """
+
+ def __init__(self, interval: float) -> None:
+ self.interval = interval
+ self.evlist = None
+ self.selected: Optional[TreeValue] = None
+ self.search_results: list[TreeNode[TreeValue]] = []
+ self.cur_search_result: TreeNode[TreeValue] | None = None
+ super().__init__()
+
+ def expand_and_select(self, node: TreeNode[Any]) -> None:
+ """Expand select a node in the tree."""
+ if node.parent:
+ node.parent.expand()
+ if node.parent.parent:
+ node.parent.parent.expand()
+ node.expand()
+ node.tree.select_node(node)
+ node.tree.scroll_to_node(node)
+
+ def set_searched_tree_node(self, previous: bool) -> None:
+ """Set the cur_search_result node to either the next or previous."""
+ l = len(self.search_results)
+
+ if l < 1:
+ tree: Tree[TreeValue] = self.query_one("#root", Tree)
+ if previous:
+ tree.action_cursor_up()
+ else:
+ tree.action_cursor_down()
+ return
+
+ if self.cur_search_result:
+ idx = self.search_results.index(self.cur_search_result)
+ if previous:
+ idx = idx - 1 if idx > 0 else l - 1
+ else:
+ idx = idx + 1 if idx < l - 1 else 0
+ else:
+ idx = l - 1 if previous else 0
+
+ node = self.search_results[idx]
+ if node == self.cur_search_result:
+ return
+
+ self.cur_search_result = node
+ self.expand_and_select(node)
+
+ def action_search(self) -> None:
+ """Search was chosen."""
+ def set_initial_focus(event: str | None) -> None:
+ """Sets the focus after the SearchScreen is dismissed."""
+
+ search_label = self.query_one("#active_search", Label)
+ search_label.display = True if event else False
+ if not event:
+ return
+ event = event.lower()
+ search_label.update(f'Searching for events matching "{event}"')
+
+ tree: Tree[str] = self.query_one("#root", Tree)
+
+ def find_search_results(event: str, node: TreeNode[str],
+ cursor_seen: bool = False,
+ match_after_cursor: Optional[TreeNode[str]] = None
+ ) -> Tuple[bool, Optional[TreeNode[str]]]:
+ """Find nodes that match the search remembering the one after the cursor."""
+ if not cursor_seen and node == tree.cursor_node:
+ cursor_seen = True
+ if node.data and node.data.matches(event):
+ if cursor_seen and not match_after_cursor:
+ match_after_cursor = node
+ self.search_results.append(node)
+
+ if node.children:
+ for child in node.children:
+ (cursor_seen, match_after_cursor) = \
+ find_search_results(event, child, cursor_seen, match_after_cursor)
+ return (cursor_seen, match_after_cursor)
+
+ self.search_results.clear()
+ (_, self.cur_search_result) = find_search_results(event, tree.root)
+ if len(self.search_results) < 1:
+ self.push_screen(ErrorScreen(f"Failed to find pmu/event or metric {event}"))
+ search_label.display = False
+ elif self.cur_search_result:
+ self.expand_and_select(self.cur_search_result)
+ else:
+ self.set_searched_tree_node(previous=False)
+
+ self.push_screen(SearchScreen(), set_initial_focus)
+
+ def action_next(self) -> None:
+ """Next was chosen."""
+ self.set_searched_tree_node(previous=False)
+
+ def action_prev(self) -> None:
+ """Previous was chosen."""
+ self.set_searched_tree_node(previous=True)
+
+ def action_collapse(self) -> None:
+ """Collapse the part of the tree currently on."""
+ tree: Tree[str] = self.query_one("#root", Tree)
+ node = tree.cursor_node
+ if node and node.parent:
+ node.parent.collapse_all()
+ node.tree.scroll_to_node(node.parent)
+
+ def update_counts(self) -> None:
+ """Called every interval to update counts."""
+ if not self.selected or not self.evlist:
+ return
+
+ def update_count(cpu: int, count: int):
+ # Update the raw count display.
+ counter: Label = self.query(f"#counter_cpu{cpu}" if cpu >= 0 else "#counter_total")
+ if not counter:
+ return
+ counter = counter.first(Label)
+ counter.update(str(count))
+
+ # Update the sparkline.
+ line: Sparkline = self.query(f"#sparkline_cpu{cpu}" if cpu >= 0 else "#sparkline_total")
+ if not line:
+ return
+ line = line.first(Sparkline)
+ # If there are more events than the width, remove the front event.
+ if len(line.data) > line.size.width:
+ line.data.pop(0)
+ line.data.append(count)
+ line.mutate_reactive(Sparkline.data)
+
+ # Update the total and each CPU counts, assume there's just 1 evsel.
+ total = 0
+ self.evlist.disable()
+ for evsel in self.evlist:
+ for cpu in evsel.cpus():
+ aggr = 0
+ for thread in evsel.threads():
+ aggr += self.selected.value(self.evlist, evsel, cpu, thread)
+ update_count(cpu, aggr)
+ total += aggr
+ update_count(-1, total)
+ self.evlist.enable()
+
+ def on_mount(self) -> None:
+ """When App starts set up periodic event updating."""
+ self.update_counts()
+ self.set_interval(self.interval, self.update_counts)
+
+ def set_selected(self, value: TreeValue) -> None:
+ """Updates the event/description and starts the counters."""
+ try:
+ label_name = self.query_one("#event_name", Label)
+ event_description = self.query_one("#event_description", Static)
+ lines = self.query_one("#lines")
+ except NoMatches:
+ # A race with rendering, ignore the update as we can't
+ # mount the assumed output widgets.
+ return
+
+ self.selected = value
+
+ # Remove previous event information.
+ if self.evlist:
+ self.evlist.disable()
+ self.evlist.close()
+ old_lines = self.query(CounterSparkline)
+ for line in old_lines:
+ line.remove()
+ old_counters = self.query(Counter)
+ for counter in old_counters:
+ counter.remove()
+
+ # Update event/metric text and description.
+ label_name.update(value.name())
+ event_description.update(value.description())
+
+ # Open the event.
+ try:
+ self.evlist = value.parse()
+ if self.evlist:
+ self.evlist.open()
+ self.evlist.enable()
+ except:
+ self.evlist = None
+
+ if not self.evlist:
+ self.push_screen(ErrorScreen(f"Failed to open {value.name()}"))
+ return
+
+ # Add spark lines for all the CPUs. Note, must be done after
+ # open so that the evlist CPUs have been computed by propagate
+ # maps.
+ line = CounterSparkline(cpu=-1)
+ lines.mount(line)
+ for cpu in self.evlist.all_cpus():
+ line = CounterSparkline(cpu)
+ lines.mount(line)
+ line = Counter(cpu=-1)
+ lines.mount(line)
+ for cpu in self.evlist.all_cpus():
+ line = Counter(cpu)
+ lines.mount(line)
+
+ def compose(self) -> ComposeResult:
+ """Draws the app."""
+ def metric_event_tree() -> Tree:
+ """Create tree of PMUs and metricgroups with events or metrics under."""
+ tree: Tree[TreeValue] = Tree("Root", id="root")
+ pmus = tree.root.add("PMUs")
+ for pmu in perf.pmus():
+ pmu_name = pmu.name().lower()
+ pmu_node = pmus.add(pmu_name)
+ try:
+ for event in sorted(pmu.events(), key=lambda x: x["name"]):
+ if "deprecated" in event:
+ continue
+ if "name" in event:
+ e = event["name"].lower()
+ if "alias" in event:
+ pmu_node.add_leaf(f'{e} ({event["alias"]})',
+ data=PmuEvent(pmu_name, e))
+ else:
+ pmu_node.add_leaf(e, data=PmuEvent(pmu_name, e))
+ except:
+ # Reading events may fail with EPERM, ignore.
+ pass
+ metrics = tree.root.add("Metrics")
+ groups = set()
+ for metric in perf.metrics():
+ groups.update(metric["MetricGroup"])
+
+ def add_metrics_to_tree(node: TreeNode[TreeValue], parent: str, pmu: str = None):
+ for metric in sorted(perf.metrics(), key=lambda x: x["MetricName"]):
+ metric_pmu = metric.get('PMU')
+ if pmu and metric_pmu and metric_pmu != pmu:
+ continue
+ if parent in metric["MetricGroup"]:
+ name = metric["MetricName"]
+ display_name = name
+ if metric_pmu:
+ display_name += f" ({metric_pmu})"
+ node.add_leaf(display_name, data=Metric(name, metric_pmu))
+ child_group_name = f'{name}_group'
+ if child_group_name in groups:
+ display_child_group_name = child_group_name
+ if metric_pmu:
+ display_child_group_name += f" ({metric_pmu})"
+ add_metrics_to_tree(node.add(display_child_group_name),
+ child_group_name,
+ metric_pmu)
+
+ for group in sorted(groups):
+ if group.endswith('_group'):
+ continue
+ add_metrics_to_tree(metrics.add(group), group)
+
+ tree.root.expand()
+ return tree
+
+ yield Header(id="header")
+ yield Horizontal(Vertical(metric_event_tree(), id="events"),
+ Vertical(Label("event name", id="event_name"),
+ Static("description", markup=False, id="event_description"),
+ ))
+ yield Label(id="active_search")
+ yield VerticalScroll(id="lines")
+ yield Footer(id="footer")
+
+ @on(Tree.NodeSelected)
+ def on_tree_node_selected(self, event: Tree.NodeSelected[TreeValue]) -> None:
+ """Called when a tree node is selected, selecting the event."""
+ if event.node.data:
+ self.set_selected(event.node.data)
+
+
+if __name__ == "__main__":
+ ap = argparse.ArgumentParser()
+ ap.add_argument('-I', '--interval', help="Counter update interval in seconds", default=0.1)
+ args = ap.parse_args()
+ app = IListApp(float(args.interval))
+ app.run()
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build
index 9b0e5a8b5070..01a1a0ed51ae 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build
@@ -2,7 +2,7 @@ perf-util-y += Context.o
CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
-CFLAGS_Context.o += -Wno-switch-default -Wno-shadow
+CFLAGS_Context.o += -Wno-switch-default -Wno-shadow -Wno-thread-safety-analysis
ifeq ($(CC_NO_CLANG), 1)
CFLAGS_Context.o += -Wno-unused-command-line-argument
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 3e8394be15ae..c2a67ce45941 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -20,7 +20,6 @@ perf-test-y += hists_link.o
perf-test-y += hists_filter.o
perf-test-y += hists_output.o
perf-test-y += hists_cumulate.o
-perf-test-y += python-use.o
perf-test-y += bp_signal.o
perf-test-y += bp_signal_overflow.o
perf-test-y += bp_account.o
@@ -70,12 +69,12 @@ perf-test-y += util.o
perf-test-y += hwmon_pmu.o
perf-test-y += tool_pmu.o
perf-test-y += subcmd-help.o
+perf-test-y += kallsyms-split.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
-CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
perf-test-y += workloads/
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 85142dfb3e01..bd6ffa8e4578 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -84,7 +84,6 @@ static struct test_suite *generic_tests[] = {
&suite__syscall_openat_tp_fields,
#endif
&suite__hists_link,
- &suite__python_use,
&suite__bp_signal,
&suite__bp_signal_overflow,
&suite__bp_accounting,
@@ -127,7 +126,7 @@ static struct test_suite *generic_tests[] = {
&suite__jit_write_elf,
&suite__pfm,
&suite__api_io,
- &suite__maps__merge_in,
+ &suite__maps,
&suite__demangle_java,
&suite__demangle_ocaml,
&suite__demangle_rust,
@@ -141,6 +140,7 @@ static struct test_suite *generic_tests[] = {
&suite__symbols,
&suite__util,
&suite__subcmd_help,
+ &suite__kallsyms_split,
NULL,
};
@@ -152,6 +152,7 @@ static struct test_workload *workloads[] = {
&workload__brstack,
&workload__datasym,
&workload__landlock,
+ &workload__traploop,
};
#define workloads__for_each(workload) \
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 9c2091310191..5927d1ea20e2 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -2,6 +2,7 @@
#include <errno.h>
#include <linux/kconfig.h>
#include <linux/kernel.h>
+#include <linux/rbtree.h>
#include <linux/types.h>
#include <inttypes.h>
#include <stdlib.h>
@@ -39,11 +40,69 @@
#define BUFSZ 1024
#define READLEN 128
-struct state {
- u64 done[1024];
- size_t done_cnt;
+struct tested_section {
+ struct rb_node rb_node;
+ u64 addr;
+ char *path;
};
+static bool tested_code_insert_or_exists(const char *path, u64 addr,
+ struct rb_root *tested_sections)
+{
+ struct rb_node **node = &tested_sections->rb_node;
+ struct rb_node *parent = NULL;
+ struct tested_section *data;
+
+ while (*node) {
+ int cmp;
+
+ parent = *node;
+ data = rb_entry(*node, struct tested_section, rb_node);
+ cmp = strcmp(path, data->path);
+ if (!cmp) {
+ if (addr < data->addr)
+ cmp = -1;
+ else if (addr > data->addr)
+ cmp = 1;
+ else
+ return true; /* already tested */
+ }
+
+ if (cmp < 0)
+ node = &(*node)->rb_left;
+ else
+ node = &(*node)->rb_right;
+ }
+
+ data = zalloc(sizeof(*data));
+ if (!data)
+ return true;
+
+ data->addr = addr;
+ data->path = strdup(path);
+ if (!data->path) {
+ free(data);
+ return true;
+ }
+ rb_link_node(&data->rb_node, parent, node);
+ rb_insert_color(&data->rb_node, tested_sections);
+ return false;
+}
+
+static void tested_sections__free(struct rb_root *root)
+{
+ while (!RB_EMPTY_ROOT(root)) {
+ struct rb_node *node = rb_first(root);
+ struct tested_section *ts = rb_entry(node,
+ struct tested_section,
+ rb_node);
+
+ rb_erase(node, root);
+ free(ts->path);
+ free(ts);
+ }
+}
+
static size_t read_objdump_chunk(const char **line, unsigned char **buf,
size_t *buf_len)
{
@@ -316,13 +375,15 @@ static void dump_buf(unsigned char *buf, size_t len)
}
static int read_object_code(u64 addr, size_t len, u8 cpumode,
- struct thread *thread, struct state *state)
+ struct thread *thread,
+ struct rb_root *tested_sections)
{
struct addr_location al;
unsigned char buf1[BUFSZ] = {0};
unsigned char buf2[BUFSZ] = {0};
size_t ret_len;
u64 objdump_addr;
+ u64 skip_addr;
const char *objdump_name;
char decomp_name[KMOD_DECOMP_LEN];
bool decomp = false;
@@ -350,6 +411,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
goto out;
}
+ /*
+ * Don't retest the same addresses. objdump struggles with kcore - try
+ * each map only once even if the address is different.
+ */
+ skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
+ if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
+ tested_sections)) {
+ pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
+ dso__long_name(dso), skip_addr);
+ goto out;
+ }
+
pr_debug("On file address is: %#"PRIx64"\n", al.addr);
if (len > BUFSZ)
@@ -387,24 +460,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
goto out;
}
- /* objdump struggles with kcore - try each map only once */
- if (dso__is_kcore(dso)) {
- size_t d;
-
- for (d = 0; d < state->done_cnt; d++) {
- if (state->done[d] == map__start(al.map)) {
- pr_debug("kcore map tested already");
- pr_debug(" - skipping\n");
- goto out;
- }
- }
- if (state->done_cnt >= ARRAY_SIZE(state->done)) {
- pr_debug("Too many kcore maps - skipping\n");
- goto out;
- }
- state->done[state->done_cnt++] = map__start(al.map);
- }
-
objdump_name = dso__long_name(dso);
if (dso__needs_decompress(dso)) {
if (dso__decompress_kmodule_path(dso, objdump_name,
@@ -471,9 +526,9 @@ out:
return err;
}
-static int process_sample_event(struct machine *machine,
- struct evlist *evlist,
- union perf_event *event, struct state *state)
+static int process_sample_event(struct machine *machine, struct evlist *evlist,
+ union perf_event *event,
+ struct rb_root *tested_sections)
{
struct perf_sample sample;
struct thread *thread;
@@ -494,7 +549,8 @@ static int process_sample_event(struct machine *machine,
goto out;
}
- ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
+ ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
+ tested_sections);
thread__put(thread);
out:
perf_sample__exit(&sample);
@@ -502,10 +558,11 @@ out:
}
static int process_event(struct machine *machine, struct evlist *evlist,
- union perf_event *event, struct state *state)
+ union perf_event *event, struct rb_root *tested_sections)
{
if (event->header.type == PERF_RECORD_SAMPLE)
- return process_sample_event(machine, evlist, event, state);
+ return process_sample_event(machine, evlist, event,
+ tested_sections);
if (event->header.type == PERF_RECORD_THROTTLE ||
event->header.type == PERF_RECORD_UNTHROTTLE)
@@ -525,7 +582,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
}
static int process_events(struct machine *machine, struct evlist *evlist,
- struct state *state)
+ struct rb_root *tested_sections)
{
union perf_event *event;
struct mmap *md;
@@ -537,7 +594,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
continue;
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
- ret = process_event(machine, evlist, event, state);
+ ret = process_event(machine, evlist, event, tested_sections);
perf_mmap__consume(&md->core);
if (ret < 0)
return ret;
@@ -637,9 +694,7 @@ static int do_test_code_reading(bool try_kcore)
.uses_mmap = true,
},
};
- struct state state = {
- .done_cnt = 0,
- };
+ struct rb_root tested_sections = RB_ROOT;
struct perf_thread_map *threads = NULL;
struct perf_cpu_map *cpus = NULL;
struct evlist *evlist = NULL;
@@ -649,7 +704,7 @@ static int do_test_code_reading(bool try_kcore)
struct map *map;
bool have_vmlinux, have_kcore;
struct dso *dso;
- const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL };
+ const char *events[] = { "cpu-cycles", "cpu-cycles:u", "cpu-clock", "cpu-clock:u", NULL };
int evidx = 0;
struct perf_env host_env;
@@ -773,7 +828,7 @@ static int do_test_code_reading(bool try_kcore)
evlist__disable(evlist);
- ret = process_events(machine, evlist, &state);
+ ret = process_events(machine, evlist, &tested_sections);
if (ret < 0)
goto out_put;
@@ -793,6 +848,7 @@ out_err:
perf_thread_map__put(threads);
machine__delete(machine);
perf_env__exit(&host_env);
+ tested_sections__free(&tested_sections);
return err;
}
diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c
index 151f02701c8c..4aa4aac94f09 100644
--- a/tools/perf/tests/hwmon_pmu.c
+++ b/tools/perf/tests/hwmon_pmu.c
@@ -4,6 +4,7 @@
#include "hwmon_pmu.h"
#include "parse-events.h"
#include "tests.h"
+#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <linux/compiler.h>
diff --git a/tools/perf/tests/kallsyms-split.c b/tools/perf/tests/kallsyms-split.c
new file mode 100644
index 000000000000..bbbc66957e5d
--- /dev/null
+++ b/tools/perf/tests/kallsyms-split.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include "util/dso.h"
+#include "util/map.h"
+#include "util/symbol.h"
+#include "util/debug.h"
+#include "util/machine.h"
+#include "tests.h"
+
+/*
+ * This test is to check whether a bad symbol in a module won't split kallsyms maps.
+ * The main_symbol[1-3] should belong to the main [kernel.kallsyms] map even if the
+ * bad_symbol from the module is found in the middle.
+ */
+static char root_template[] = "/tmp/perf-test.XXXXXX";
+static char *root_dir;
+
+static const char proc_version[] = "Linux version X.Y.Z (just for perf test)\n";
+static const char proc_modules[] = "module 4096 1 - Live 0xffffffffcd000000\n";
+static const char proc_kallsyms[] =
+ "ffffffffab200000 T _stext\n"
+ "ffffffffab200010 T good_symbol\n"
+ "ffffffffab200020 t bad_symbol\n"
+ "ffffffffab200030 t main_symbol1\n"
+ "ffffffffab200040 t main_symbol2\n"
+ "ffffffffab200050 t main_symbol3\n"
+ "ffffffffab200060 T _etext\n"
+ "ffffffffcd000000 T start_module\t[module]\n"
+ "ffffffffab200020 u bad_symbol\t[module]\n"
+ "ffffffffcd000040 T end_module\t[module]\n";
+
+static struct {
+ const char *name;
+ const char *contents;
+ long len;
+} proc_files[] = {
+ { "version", proc_version, sizeof(proc_version) - 1 },
+ { "modules", proc_modules, sizeof(proc_modules) - 1 },
+ { "kallsyms", proc_kallsyms, sizeof(proc_kallsyms) - 1 },
+};
+
+static void remove_proc_dir(int sig __maybe_unused)
+{
+ char buf[128];
+
+ if (root_dir == NULL)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(proc_files); i++) {
+ scnprintf(buf, sizeof(buf), "%s/proc/%s", root_dir, proc_files[i].name);
+ remove(buf);
+ }
+
+ scnprintf(buf, sizeof(buf), "%s/proc", root_dir);
+ rmdir(buf);
+
+ rmdir(root_dir);
+ root_dir = NULL;
+}
+
+static int create_proc_dir(void)
+{
+ char buf[128];
+
+ root_dir = mkdtemp(root_template);
+ if (root_dir == NULL)
+ return -1;
+
+ scnprintf(buf, sizeof(buf), "%s/proc", root_dir);
+ if (mkdir(buf, 0700) < 0)
+ goto err;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(proc_files); i++) {
+ int fd, len;
+
+ scnprintf(buf, sizeof(buf), "%s/proc/%s", root_dir, proc_files[i].name);
+ fd = open(buf, O_RDWR | O_CREAT, 0600);
+ if (fd < 0)
+ goto err;
+
+ len = write(fd, proc_files[i].contents, proc_files[i].len);
+ close(fd);
+ if (len != proc_files[i].len)
+ goto err;
+ }
+ return 0;
+
+err:
+ remove_proc_dir(0);
+ return -1;
+}
+
+static int test__kallsyms_split(struct test_suite *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct machine m;
+ struct map *map = NULL;
+ int ret = TEST_FAIL;
+
+ pr_debug("try to create fake root directory\n");
+ if (create_proc_dir() < 0) {
+ pr_debug("SKIP: cannot create a fake root directory\n");
+ return TEST_SKIP;
+ }
+
+ signal(SIGINT, remove_proc_dir);
+ signal(SIGPIPE, remove_proc_dir);
+ signal(SIGSEGV, remove_proc_dir);
+ signal(SIGTERM, remove_proc_dir);
+
+ pr_debug("create kernel maps from the fake root directory\n");
+ machine__init(&m, root_dir, HOST_KERNEL_ID);
+ if (machine__create_kernel_maps(&m) < 0) {
+ pr_debug("FAIL: failed to create kernel maps\n");
+ goto out;
+ }
+
+ /* force to use /proc/kallsyms */
+ symbol_conf.ignore_vmlinux = true;
+ symbol_conf.ignore_vmlinux_buildid = true;
+ symbol_conf.allow_aliases = true;
+
+ if (map__load(machine__kernel_map(&m)) < 0) {
+ pr_debug("FAIL: failed to load kallsyms\n");
+ goto out;
+ }
+
+ pr_debug("kernel map loaded - check symbol and map\n");
+ if (maps__nr_maps(machine__kernel_maps(&m)) != 2) {
+ pr_debug("FAIL: it should have the kernel and a module, but has %u maps\n",
+ maps__nr_maps(machine__kernel_maps(&m)));
+ goto out;
+ }
+
+ if (machine__find_kernel_symbol_by_name(&m, "main_symbol3", &map) == NULL) {
+ pr_debug("FAIL: failed to find a symbol\n");
+ goto out;
+ }
+
+ if (!RC_CHK_EQUAL(map, machine__kernel_map(&m))) {
+ pr_debug("FAIL: the symbol is not in the kernel map\n");
+ goto out;
+ }
+ ret = TEST_OK;
+
+out:
+ remove_proc_dir(0);
+ machine__exit(&m);
+ return ret;
+}
+
+DEFINE_SUITE("split kallsyms", kallsyms_split);
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index eafb49eb0b56..729cc9cc1cb7 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -90,7 +90,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte
perf_evlist__set_maps(&evlist->core, cpus, threads);
CHECK__(parse_event(evlist, "dummy:u"));
- CHECK__(parse_event(evlist, "cycles:u"));
+ CHECK__(parse_event(evlist, "cpu-cycles:u"));
evlist__config(evlist, &opts, NULL);
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index c574a678c28a..6641701e4828 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -73,9 +73,9 @@ make_extra_tests := EXTRA_TESTS=1
make_jevents_all := JEVENTS_ARCH=all
make_no_bpf_skel := BUILD_BPF_SKEL=0
make_gen_vmlinux_h := GEN_VMLINUX_H=1
-make_no_libperl := NO_LIBPERL=1
+make_libperl := LIBPERL=1
make_no_libpython := NO_LIBPYTHON=1
-make_no_scripts := NO_LIBPYTHON=1 NO_LIBPERL=1
+make_no_scripts := NO_LIBPYTHON=1
make_no_slang := NO_SLANG=1
make_no_gtk2 := NO_GTK2=1
make_no_ui := NO_SLANG=1 NO_GTK2=1
@@ -88,7 +88,6 @@ make_no_backtrace := NO_BACKTRACE=1
make_no_libcapstone := NO_CAPSTONE=1
make_no_libnuma := NO_LIBNUMA=1
make_no_libbionic := NO_LIBBIONIC=1
-make_no_auxtrace := NO_AUXTRACE=1
make_no_libbpf := NO_LIBBPF=1
make_libbpf_dynamic := LIBBPF_DYNAMIC=1
make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
@@ -118,10 +117,10 @@ make_install_prefix_slash := install prefix=/tmp/krava/
make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1 NO_LIBTRACEEVENT=1 NO_LIBELF=1
# all the NO_* variable combined
-make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1
+make_minimal := NO_LIBPYTHON=1 NO_GTK2=1
make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBBIONIC=1 NO_LIBDW=1
-make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_LIBBPF=1
make_minimal += NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
make_minimal += NO_LIBCAP=1 NO_CAPSTONE=1
@@ -143,7 +142,7 @@ run += make_extra_tests
run += make_jevents_all
run += make_no_bpf_skel
run += make_gen_vmlinux_h
-run += make_no_libperl
+run += make_libperl
run += make_no_libpython
run += make_no_scripts
run += make_no_slang
@@ -158,7 +157,6 @@ run += make_no_backtrace
run += make_no_libcapstone
run += make_no_libnuma
run += make_no_libbionic
-run += make_no_auxtrace
run += make_no_libbpf
run += make_no_libbpf_DEBUG
run += make_no_libllvm
diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c
index 4f1f9385ea9c..2c05d62f40dc 100644
--- a/tools/perf/tests/maps.c
+++ b/tools/perf/tests/maps.c
@@ -162,4 +162,84 @@ static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest
return TEST_OK;
}
-DEFINE_SUITE("maps__merge_in", maps__merge_in);
+static int test__maps__fixup_overlap_and_insert(struct test_suite *t __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct map_def initial_maps[] = {
+ { "target_map", 1000, 2000 },
+ { "next_map", 3000, 4000 },
+ };
+ struct map_def insert_split = { "split_map", 1400, 1600 };
+ struct map_def expected_after_split[] = {
+ { "target_map", 1000, 1400 },
+ { "split_map", 1400, 1600 },
+ { "target_map", 1600, 2000 },
+ { "next_map", 3000, 4000 },
+ };
+
+ struct map_def insert_eclipse = { "eclipse_map", 2500, 4500 };
+ struct map_def expected_final[] = {
+ { "target_map", 1000, 1400 },
+ { "split_map", 1400, 1600 },
+ { "target_map", 1600, 2000 },
+ { "eclipse_map", 2500, 4500 },
+ /* "next_map" (3000-4000) is removed */
+ };
+
+ struct map *map_split, *map_eclipse;
+ int ret;
+ unsigned int i;
+ struct maps *maps = maps__new(NULL);
+
+ TEST_ASSERT_VAL("failed to create maps", maps);
+
+ for (i = 0; i < ARRAY_SIZE(initial_maps); i++) {
+ struct map *map = dso__new_map(initial_maps[i].name);
+
+ TEST_ASSERT_VAL("failed to create map", map);
+ map__set_start(map, initial_maps[i].start);
+ map__set_end(map, initial_maps[i].end);
+ TEST_ASSERT_VAL("failed to insert map", maps__insert(maps, map) == 0);
+ map__put(map);
+ }
+
+ // Check splitting.
+ map_split = dso__new_map(insert_split.name);
+ TEST_ASSERT_VAL("failed to create split map", map_split);
+ map__set_start(map_split, insert_split.start);
+ map__set_end(map_split, insert_split.end);
+
+ ret = maps__fixup_overlap_and_insert(maps, map_split);
+ TEST_ASSERT_VAL("failed to fixup and insert split map", !ret);
+
+ map__zput(map_split);
+ ret = check_maps(expected_after_split, ARRAY_SIZE(expected_after_split), maps);
+ TEST_ASSERT_VAL("split check failed", !ret);
+
+ // Check cover 1 map with another.
+ map_eclipse = dso__new_map(insert_eclipse.name);
+ TEST_ASSERT_VAL("failed to create eclipse map", map_eclipse);
+ map__set_start(map_eclipse, insert_eclipse.start);
+ map__set_end(map_eclipse, insert_eclipse.end);
+
+ ret = maps__fixup_overlap_and_insert(maps, map_eclipse);
+ TEST_ASSERT_VAL("failed to fixup and insert eclipse map", !ret);
+
+ map__zput(map_eclipse);
+ ret = check_maps(expected_final, ARRAY_SIZE(expected_final), maps);
+ TEST_ASSERT_VAL("eclipse check failed", !ret);
+
+ maps__zput(maps);
+ return TEST_OK;
+}
+
+static struct test_case tests__maps[] = {
+ TEST_CASE("Test merge_in interface", maps__merge_in),
+ TEST_CASE("Test fix up overlap interface", maps__fixup_overlap_and_insert),
+ { .name = NULL, }
+};
+
+struct test_suite suite__maps = {
+ .desc = "Maps - per process mmaps abstraction",
+ .test_cases = tests__maps,
+};
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 3c89d3001887..3313c236104e 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -322,7 +322,7 @@ static int test_stat_user_read(u64 event, enum user_read_state enabled)
}
perf_evsel__read(evsel, 0, 0, &counts);
- if (counts.val == 0) {
+ if (rdpmc_supported && counts.val == 0) {
pr_err("User space counter reading for PMU %s [Failed read]\n", pmu->name);
ret = TEST_FAIL;
goto cleanup;
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index bb8004397650..128d21dc389f 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include "parse-events.h"
#include "evsel.h"
+#include "evsel_fprintf.h"
#include "evlist.h"
#include <api/fs/fs.h>
#include "tests.h"
#include "debug.h"
#include "pmu.h"
#include "pmus.h"
+#include "strbuf.h"
#include <dirent.h>
#include <errno.h>
#include "fncache.h"
@@ -20,38 +22,61 @@
#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
-static int num_core_entries(void)
+static bool check_evlist(const char *test, int line, bool cond, struct evlist *evlist)
{
- /*
- * If the kernel supports extended type, expect events to be
- * opened once for each core PMU type. Otherwise fall back to the legacy
- * behavior of opening only one event even though there are multiple
- * PMUs
- */
- if (perf_pmus__supports_extended_type())
- return perf_pmus__num_core_pmus();
+ struct strbuf sb = STRBUF_INIT;
- return 1;
+ if (cond)
+ return true;
+
+ evlist__format_evsels(evlist, &sb, 2048);
+ pr_debug("FAILED %s:%d: %s\nFor evlist: %s\n", __FILE__, line, test, sb.buf);
+ strbuf_release(&sb);
+ return false;
}
+#define TEST_ASSERT_EVLIST(test, cond, evlist) \
+ if (!check_evlist(test, __LINE__, cond, evlist)) \
+ return TEST_FAIL
-static bool test_config(const struct evsel *evsel, __u64 expected_config)
+static bool check_evsel(const char *test, int line, bool cond, struct evsel *evsel)
{
- __u32 type = evsel->core.attr.type;
- __u64 config = evsel->core.attr.config;
+ struct perf_attr_details details = { .verbose = true, };
- if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
- /*
- * HARDWARE and HW_CACHE events encode the PMU's extended type
- * in the top 32-bits. Mask in order to ignore.
- */
- config &= PERF_HW_EVENT_MASK;
+ if (cond)
+ return true;
+
+ pr_debug("FAILED %s:%d: %s\nFor evsel: ", __FILE__, line, test);
+ evsel__fprintf(evsel, &details, debug_file());
+ return false;
+}
+#define TEST_ASSERT_EVSEL(test, cond, evsel) \
+ if (!check_evsel(test, __LINE__, cond, evsel)) \
+ return TEST_FAIL
+
+static int num_core_entries(struct evlist *evlist)
+{
+ /*
+ * Returns number of core PMUs if the evlist has >1 core PMU, otherwise
+ * returns 1. The number of core PMUs is needed as wild carding can
+ * open an event for each core PMU. If the events were opened with a
+ * specified PMU then wild carding won't happen.
+ */
+ struct perf_pmu *core_pmu = NULL;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->pmu->is_core)
+ continue;
+ if (core_pmu != evsel->pmu && core_pmu != NULL)
+ return perf_pmus__num_core_pmus();
+ core_pmu = evsel->pmu;
}
- return config == expected_config;
+ return 1;
}
-static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config)
+static bool test_hw_config(const struct evsel *evsel, __u64 expected_config)
{
- return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == expected_config;
}
#if defined(__s390x__)
@@ -84,12 +109,12 @@ static int test__checkevent_tracepoint(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 0 == evlist__nr_groups(evlist));
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong sample_type",
- PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
- TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->core.attr.sample_period);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups", 0 == evlist__nr_groups(evlist), evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_period", 1 == evsel->core.attr.sample_period, evsel);
return TEST_OK;
}
@@ -97,34 +122,38 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
{
struct evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries > 1);
- TEST_ASSERT_VAL("wrong number of groups", 0 == evlist__nr_groups(evlist));
+ TEST_ASSERT_EVLIST("wrong number of entries", evlist->core.nr_entries > 1, evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups", 0 == evlist__nr_groups(evlist), evlist);
evlist__for_each_entry(evlist, evsel) {
- TEST_ASSERT_VAL("wrong type",
- PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong sample_type",
- PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
- TEST_ASSERT_VAL("wrong sample_period",
- 1 == evsel->core.attr.sample_period);
+ TEST_ASSERT_EVSEL("wrong type",
+ PERF_TYPE_TRACEPOINT == evsel->core.attr.type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong sample_period",
+ 1 == evsel->core.attr.sample_period,
+ evsel);
}
return TEST_OK;
}
static int test__checkevent_raw(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
bool raw_type_match = false;
- TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+ TEST_ASSERT_EVLIST("wrong number of entries", 0 != evlist->core.nr_entries, evlist);
- perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ evlist__for_each_entry(evlist, evsel) {
struct perf_pmu *pmu __maybe_unused = NULL;
bool type_matched = false;
- TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a));
- TEST_ASSERT_VAL("event not parsed as raw type",
- evsel->attr.type == PERF_TYPE_RAW);
+ TEST_ASSERT_EVSEL("wrong config", test_hw_config(evsel, 0x1a), evsel);
+ TEST_ASSERT_EVSEL("event not parsed as raw type",
+ evsel->core.attr.type == PERF_TYPE_RAW,
+ evsel);
#if defined(__aarch64__)
/*
* Arm doesn't have a real raw type PMU in sysfs, so raw events
@@ -135,15 +164,15 @@ static int test__checkevent_raw(struct evlist *evlist)
type_matched = raw_type_match = true;
#else
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
- if (pmu->type == evsel->attr.type) {
- TEST_ASSERT_VAL("PMU type expected once", !type_matched);
+ if (pmu->type == evsel->core.attr.type) {
+ TEST_ASSERT_EVSEL("PMU type expected once", !type_matched, evsel);
type_matched = true;
if (pmu->type == PERF_TYPE_RAW)
raw_type_match = true;
}
}
#endif
- TEST_ASSERT_VAL("No PMU found for type", type_matched);
+ TEST_ASSERT_EVSEL("No PMU found for type", type_matched, evsel);
}
TEST_ASSERT_VAL("Raw PMU not matched", raw_type_match);
return TEST_OK;
@@ -153,62 +182,44 @@ static int test__checkevent_numeric(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", 1 == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 1 == evsel->core.attr.config, evsel);
return TEST_OK;
}
-static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name)
-{
- struct perf_pmu *pmu;
-
- if (evsel->attr.type == PERF_TYPE_HARDWARE) {
- TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id));
- return 0;
- }
- pmu = perf_pmus__find_by_type(evsel->attr.type);
-
- TEST_ASSERT_VAL("unexpected PMU type", pmu);
- TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name));
- return 0;
-}
-
static int test__checkevent_symbolic_name(struct evlist *evlist)
{
- struct perf_evsel *evsel;
-
- TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+ struct evsel *evsel;
- perf_evlist__for_each_evsel(&evlist->core, evsel) {
- int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+ TEST_ASSERT_EVLIST("wrong number of entries", 0 != evlist->core.nr_entries, evlist);
- if (ret)
- return ret;
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
}
-
return TEST_OK;
}
static int test__checkevent_symbolic_name_config(struct evlist *evlist)
{
- struct perf_evsel *evsel;
-
- TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+ struct evsel *evsel;
- perf_evlist__for_each_evsel(&evlist->core, evsel) {
- int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+ TEST_ASSERT_EVLIST("wrong number of entries", 0 != evlist->core.nr_entries, evlist);
- if (ret)
- return ret;
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
/*
* The period value gets configured within evlist__config,
* while this test executes only parse events method.
*/
- TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
- TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1);
- TEST_ASSERT_VAL("wrong config2", 1 == evsel->attr.config2);
+ TEST_ASSERT_EVSEL("wrong period", 0 == evsel->core.attr.sample_period, evsel);
+ TEST_ASSERT_EVSEL("wrong config1", 0 == evsel->core.attr.config1, evsel);
+ TEST_ASSERT_EVSEL("wrong config2", 1 == evsel->core.attr.config2, evsel);
}
return TEST_OK;
}
@@ -217,21 +228,21 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type/config", evsel__match(evsel, SOFTWARE, SW_PAGE_FAULTS),
+ evsel);
return TEST_OK;
}
static int test__checkevent_genhw(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+ TEST_ASSERT_EVLIST("wrong number of entries", 0 != evlist->core.nr_entries, evlist);
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
- TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 1 << 16));
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", test_hw_config(evsel, 1 << 16), evsel);
}
return TEST_OK;
}
@@ -240,13 +251,13 @@ static int test__checkevent_breakpoint(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
- evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
- evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type",
+ (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == evsel->core.attr.bp_type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -254,12 +265,12 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type",
- HW_BREAKPOINT_X == evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len", default_breakpoint_len() == evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type", HW_BREAKPOINT_X == evsel->core.attr.bp_type, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", default_breakpoint_len() == evsel->core.attr.bp_len,
+ evsel);
return TEST_OK;
}
@@ -267,14 +278,11 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type",
- PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type",
- HW_BREAKPOINT_R == evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len",
- HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type", HW_BREAKPOINT_R == evsel->core.attr.bp_type, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -282,14 +290,11 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type",
- PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type",
- HW_BREAKPOINT_W == evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len",
- HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type", HW_BREAKPOINT_W == evsel->core.attr.bp_type, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -297,14 +302,13 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type",
- PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type",
- (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len",
- HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type",
+ (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->core.attr.bp_type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_4 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -312,10 +316,11 @@ static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
return test__checkevent_tracepoint(evlist);
}
@@ -323,15 +328,15 @@ static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
static int
test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries > 1);
+ TEST_ASSERT_EVLIST("wrong number of entries", evlist->core.nr_entries > 1, evlist);
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
}
return test__checkevent_tracepoint_multi(evlist);
@@ -339,64 +344,77 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
static int test__checkevent_raw_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
}
return test__checkevent_raw(evlist);
}
static int test__checkevent_numeric_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
}
return test__checkevent_numeric(evlist);
}
static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == num_core_entries());
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
}
return test__checkevent_symbolic_name(evlist);
}
static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
+
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
}
return test__checkevent_symbolic_name(evlist);
}
static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude guest", evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
}
return test__checkevent_symbolic_name(evlist);
}
@@ -405,23 +423,28 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
return test__checkevent_symbolic_alias(evlist);
}
static int test__checkevent_genhw_modifier(struct evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct evsel *evsel;
- perf_evlist__for_each_entry(&evlist->core, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
}
return test__checkevent_genhw(evlist);
}
@@ -430,13 +453,17 @@ static int test__checkevent_exclude_idle_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+
+ TEST_ASSERT_EVSEL("wrong exclude idle", evsel->core.attr.exclude_idle, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
return test__checkevent_symbolic_name(evlist);
}
@@ -445,13 +472,17 @@ static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+
+ TEST_ASSERT_EVSEL("wrong exclude idle", evsel->core.attr.exclude_idle, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
return test__checkevent_symbolic_name(evlist);
}
@@ -461,11 +492,11 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:u"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "mem:0:u"), evsel);
return test__checkevent_breakpoint(evlist);
}
@@ -474,11 +505,11 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:x:k"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "mem:0:x:k"), evsel);
return test__checkevent_breakpoint_x(evlist);
}
@@ -487,11 +518,11 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:r:hp"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "mem:0:r:hp"), evsel);
return test__checkevent_breakpoint_r(evlist);
}
@@ -500,11 +531,11 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:w:up"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "mem:0:w:up"), evsel);
return test__checkevent_breakpoint_w(evlist);
}
@@ -513,11 +544,11 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:rw:kp"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "mem:0:rw:kp"), evsel);
return test__checkevent_breakpoint_rw(evlist);
}
@@ -526,11 +557,11 @@ static int test__checkevent_breakpoint_modifier_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint"), evsel);
return test__checkevent_breakpoint(evlist);
}
@@ -539,11 +570,11 @@ static int test__checkevent_breakpoint_x_modifier_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint"), evsel);
return test__checkevent_breakpoint_x(evlist);
}
@@ -552,11 +583,11 @@ static int test__checkevent_breakpoint_r_modifier_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint"), evsel);
return test__checkevent_breakpoint_r(evlist);
}
@@ -565,11 +596,11 @@ static int test__checkevent_breakpoint_w_modifier_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint"), evsel);
return test__checkevent_breakpoint_w(evlist);
}
@@ -578,11 +609,11 @@ static int test__checkevent_breakpoint_rw_modifier_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint"), evsel);
return test__checkevent_breakpoint_rw(evlist);
}
@@ -591,15 +622,15 @@ static int test__checkevent_breakpoint_2_events(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_EVSEL("wrong number of entries", 2 == evlist->core.nr_entries, evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint1"));
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint1"), evsel);
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint2"));
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "breakpoint2"), evsel);
return TEST_OK;
}
@@ -608,18 +639,20 @@ static int test__checkevent_pmu(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 10));
- TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1);
- TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2);
- TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
+ struct perf_pmu *core_pmu = perf_pmus__find_core_pmu();
+
+ TEST_ASSERT_EVSEL("wrong number of entries", 1 == evlist->core.nr_entries, evsel);
+ TEST_ASSERT_EVSEL("wrong type", core_pmu->type == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", test_hw_config(evsel, 10), evsel);
+ TEST_ASSERT_EVSEL("wrong config1", 1 == evsel->core.attr.config1, evsel);
+ TEST_ASSERT_EVSEL("wrong config2", 3 == evsel->core.attr.config2, evsel);
+ TEST_ASSERT_EVSEL("wrong config3", 0 == evsel->core.attr.config3, evsel);
+ TEST_ASSERT_EVSEL("wrong config4", 0 == evsel->core.attr.config4, evsel);
/*
* The period value gets configured within evlist__config,
* while this test executes only parse events method.
*/
- TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
+ TEST_ASSERT_EVSEL("wrong period", 0 == evsel->core.attr.sample_period, evsel);
return TEST_OK;
}
@@ -628,40 +661,41 @@ static int test__checkevent_list(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 3 <= evlist->core.nr_entries);
+ TEST_ASSERT_EVSEL("wrong number of entries", 3 <= evlist->core.nr_entries, evsel);
/* r1 */
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT != evsel->core.attr.type);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_TRACEPOINT != evsel->core.attr.type, evsel);
while (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
- TEST_ASSERT_VAL("wrong config1", 0 == evsel->core.attr.config1);
- TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2);
- TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVSEL("wrong config", 1 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong config1", 0 == evsel->core.attr.config1, evsel);
+ TEST_ASSERT_EVSEL("wrong config2", 0 == evsel->core.attr.config2, evsel);
+ TEST_ASSERT_EVSEL("wrong config3", 0 == evsel->core.attr.config3, evsel);
+ TEST_ASSERT_EVSEL("wrong config4", 0 == evsel->core.attr.config4, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
evsel = evsel__next(evsel);
}
/* syscalls:sys_enter_openat:k */
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong sample_type",
- PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
- TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong sample_period", 1 == evsel->core.attr.sample_period, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
/* 1:1:hp */
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVSEL("wrong type", 1 == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 1 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
return TEST_OK;
}
@@ -669,19 +703,22 @@ static int test__checkevent_list(struct evlist *evlist)
static int test__checkevent_pmu_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
+ struct perf_pmu *core_pmu = perf_pmus__find_core_pmu();
+ char buf[256];
- /* cpu/config=1,name=krava/u */
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "krava"));
+ /* default_core/config=1,name=krava/u */
+ TEST_ASSERT_EVLIST("wrong number of entries", 2 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", core_pmu->type == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 1 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, "krava"), evsel);
- /* cpu/config=2/u" */
+ /* default_core/config=2/u" */
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 2));
- TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "cpu/config=2/u"));
+ TEST_ASSERT_EVSEL("wrong number of entries", 2 == evlist->core.nr_entries, evsel);
+ TEST_ASSERT_EVSEL("wrong type", core_pmu->type == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 2 == evsel->core.attr.config, evsel);
+ snprintf(buf, sizeof(buf), "%s/config=2/u", core_pmu->name);
+ TEST_ASSERT_EVSEL("wrong name", evsel__name_is(evsel, buf), evsel);
return TEST_OK;
}
@@ -689,30 +726,31 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
+ struct perf_pmu *core_pmu = perf_pmus__find_core_pmu();
- /* cpu/config=1,call-graph=fp,time,period=100000/ */
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
+ /* default_core/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_EVLIST("wrong number of entries", 2 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", core_pmu->type == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 1 == evsel->core.attr.config, evsel);
/*
* The period, time and callgraph value gets configured within evlist__config,
* while this test executes only parse events method.
*/
- TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
- TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type));
+ TEST_ASSERT_EVSEL("wrong period", 0 == evsel->core.attr.sample_period, evsel);
+ TEST_ASSERT_EVSEL("wrong callgraph", !evsel__has_callchain(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type), evsel);
- /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ /* default_core/config=2,call-graph=no,time=0,period=2000/ */
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 2));
+ TEST_ASSERT_EVSEL("wrong type", core_pmu->type == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 2 == evsel->core.attr.config, evsel);
/*
* The period, time and callgraph value gets configured within evlist__config,
* while this test executes only parse events method.
*/
- TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
- TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type));
+ TEST_ASSERT_EVSEL("wrong period", 0 == evsel->core.attr.sample_period, evsel);
+ TEST_ASSERT_EVSEL("wrong callgraph", !evsel__has_callchain(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type), evsel);
return TEST_OK;
}
@@ -720,18 +758,22 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
static int test__checkevent_pmu_events(struct evlist *evlist)
{
struct evsel *evsel;
+ struct perf_pmu *core_pmu = perf_pmus__find_core_pmu();
- TEST_ASSERT_VAL("wrong number of entries", 1 <= evlist->core.nr_entries);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 <= evlist->core.nr_entries, evlist);
evlist__for_each_entry(evlist, evsel) {
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type ||
- strcmp(evsel->pmu->name, "cpu"));
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ TEST_ASSERT_EVSEL("wrong type",
+ core_pmu->type == evsel->core.attr.type ||
+ !strncmp(evsel__name(evsel), evsel->pmu->name,
+ strlen(evsel->pmu->name)),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", !evsel->core.attr.pinned, evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", !evsel->core.attr.exclusive, evsel);
}
return TEST_OK;
}
@@ -745,30 +787,26 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
* The wild card event will be opened at least once, but it may be
* opened on each core PMU.
*/
- TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries >= 2);
+ TEST_ASSERT_EVLIST("wrong number of entries", evlist->core.nr_entries >= 2, evlist);
for (int i = 0; i < evlist->core.nr_entries - 1; i++) {
evsel = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
/* pmu-event:u */
- TEST_ASSERT_VAL("wrong exclude_user",
- !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel",
- evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", !evsel->core.attr.pinned, evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", !evsel->core.attr.exclusive, evsel);
}
- /* cpu/pmu-event/u*/
+ /* default_core/pmu-event/u*/
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", evsel__find_pmu(evsel)->is_core);
- TEST_ASSERT_VAL("wrong exclude_user",
- !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel",
- evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.pinned);
+ TEST_ASSERT_EVSEL("wrong type", evsel__find_pmu(evsel)->is_core, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", !evsel->core.attr.pinned, evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", !evsel->core.attr.pinned, evsel);
return TEST_OK;
}
@@ -813,6 +851,15 @@ static int test__checkterms_simple(struct parse_events_terms *terms)
TEST_ASSERT_VAL("wrong val", term->val.num == 4);
TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config3"));
+ /* config4=5 */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG4);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 5);
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config4"));
+
/* umask=1*/
term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
@@ -855,48 +902,45 @@ static int test__checkterms_simple(struct parse_events_terms *terms)
static int test__group1(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (num_core_entries() * 2));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == num_core_entries());
+ struct evsel *evsel = NULL, *leader;
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (num_core_entries(evlist) * 2),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* instructions:k */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
/* cycles:upp */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
return TEST_OK;
}
@@ -905,61 +949,66 @@ static int test__group2(struct evlist *evlist)
{
struct evsel *evsel, *leader = NULL;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries() + 1));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist) + 1),
+ evlist);
/*
* TODO: Currently the software event won't be grouped with the hardware
* event except for 1 PMU.
*/
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
+ TEST_ASSERT_EVLIST("wrong number of groups", 1 == evlist__nr_groups(evlist), evlist);
evlist__for_each_entry(evlist, evsel) {
- int ret;
-
- if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) {
+ if (evsel__match(evsel, SOFTWARE, SW_PAGE_FAULTS)) {
/* faults + :ku modifier */
leader = evsel;
- TEST_ASSERT_VAL("wrong config",
- test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS));
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
continue;
}
- if (evsel->core.attr.type == PERF_TYPE_HARDWARE &&
- test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) {
+ if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) {
/* branches + :u modifier */
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- if (evsel__has_leader(evsel, leader))
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ if (evsel__has_leader(evsel, leader)) {
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1,
+ evsel);
+ }
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
continue;
}
/* cycles:k */
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
return TEST_OK;
}
@@ -967,155 +1016,172 @@ static int test__group2(struct evlist *evlist)
static int test__group3(struct evlist *evlist __maybe_unused)
{
struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL;
- int ret;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2),
+ evlist);
/*
* Currently the software event won't be grouped with the hardware event
* except for 1 PMU. This means there are always just 2 groups
* regardless of the number of core PMUs.
*/
- TEST_ASSERT_VAL("wrong number of groups", 2 == evlist__nr_groups(evlist));
+ TEST_ASSERT_EVLIST("wrong number of groups", 2 == evlist__nr_groups(evlist), evlist);
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
/* group1 syscalls:sys_enter_openat:H */
group1_leader = evsel;
- TEST_ASSERT_VAL("wrong sample_type",
- evsel->core.attr.sample_type == PERF_TP_SAMPLE_TYPE);
- TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong group name", !strcmp(evsel->group_name, "group1"));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong sample_type",
+ evsel->core.attr.sample_type == PERF_TP_SAMPLE_TYPE,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong sample_period",
+ 1 == evsel->core.attr.sample_period,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", evsel->core.attr.exclude_guest,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !strcmp(evsel->group_name, "group1"),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
continue;
}
- if (evsel->core.attr.type == PERF_TYPE_HARDWARE &&
- test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)) {
+ if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
if (evsel->core.attr.exclude_user) {
/* group1 cycles:kppp */
- TEST_ASSERT_VAL("wrong exclude_user",
- evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel",
- !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest",
- !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host",
- !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip",
- evsel->core.attr.precise_ip == 3);
+ TEST_ASSERT_EVSEL("wrong exclude_user",
+ evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel",
+ !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest",
+ !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host",
+ !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip",
+ evsel->core.attr.precise_ip == 3, evsel);
if (evsel__has_leader(evsel, group1_leader)) {
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong group_idx",
- evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx",
+ evsel__group_idx(evsel) == 1,
+ evsel);
}
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
} else {
/* group2 cycles + G modifier */
group2_leader = evsel;
- TEST_ASSERT_VAL("wrong exclude_kernel",
- !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv",
- !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest",
- !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host",
- evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_EVSEL("wrong exclude_kernel",
+ !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv",
+ !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest",
+ !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host",
+ evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel),
+ evsel);
if (evsel->core.nr_members == 2) {
- TEST_ASSERT_VAL("wrong group_idx",
- evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("wrong group_idx",
+ evsel__group_idx(evsel) == 0,
+ evsel);
}
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
continue;
}
if (evsel->core.attr.type == 1) {
/* group2 1:3 + G modifier */
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 3));
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- if (evsel__has_leader(evsel, group2_leader))
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("wrong config", 3 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ if (evsel__has_leader(evsel, group2_leader)) {
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1,
+ evsel);
+ }
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
continue;
}
/* instructions:u */
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
return TEST_OK;
}
static int test__group4(struct evlist *evlist __maybe_unused)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (num_core_entries() * 2));
- TEST_ASSERT_VAL("wrong number of groups",
- num_core_entries() == evlist__nr_groups(evlist));
+ struct evsel *evsel = NULL, *leader;
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (num_core_entries(evlist) * 2),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ num_core_entries(evlist) == evlist__nr_groups(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles:u + p */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip == 1, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
/* instructions:kp + p */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
return TEST_OK;
}
@@ -1123,96 +1189,92 @@ static int test__group4(struct evlist *evlist __maybe_unused)
static int test__group5(struct evlist *evlist __maybe_unused)
{
struct evsel *evsel = NULL, *leader;
- int ret;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (5 * num_core_entries()));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == (2 * num_core_entries()));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (5 * num_core_entries(evlist)),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == (2 * num_core_entries(evlist)),
+ evlist);
- for (int i = 0; i < num_core_entries(); i++) {
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles + G */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
/* instructions + G */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
}
- for (int i = 0; i < num_core_entries(); i++) {
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles:G */
evsel = leader = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", !evsel->sample_read, evsel);
/* instructions:G */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
}
- for (int i = 0; i < num_core_entries(); i++) {
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
}
return TEST_OK;
}
@@ -1221,45 +1283,43 @@ static int test__group_gh1(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries()));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == num_core_entries());
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist)),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles + :H group modifier */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
/* cache-misses:G + :H group modifier */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
}
return TEST_OK;
}
@@ -1268,45 +1328,43 @@ static int test__group_gh2(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries()));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == num_core_entries());
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist)),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles + :G group modifier */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
/* cache-misses:H + :G group modifier */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
}
return TEST_OK;
}
@@ -1315,45 +1373,43 @@ static int test__group_gh3(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries()));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == num_core_entries());
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist)),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles:G + :u group modifier */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
/* cache-misses:H + :u group modifier */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
}
return TEST_OK;
}
@@ -1362,45 +1418,43 @@ static int test__group_gh4(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries()));
- TEST_ASSERT_VAL("wrong number of groups",
- evlist__nr_groups(evlist) == num_core_entries());
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist)),
+ evlist);
+ TEST_ASSERT_EVLIST("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles:G + :uG group modifier */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__is_group_leader(evsel), evsel);
+ TEST_ASSERT_EVSEL("wrong core.nr_members", evsel->core.nr_members == 2, evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 0, evsel);
/* cache-misses:H + :uG group modifier */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong group_idx", evsel__group_idx(evsel) == 1, evsel);
}
return TEST_OK;
}
@@ -1409,58 +1463,54 @@ static int test__leader_sample1(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (3 * num_core_entries()));
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (3 * num_core_entries(evlist)),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles - sampling group leader */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", evsel->sample_read, evsel);
/* cache-misses - not sampling */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", evsel->sample_read, evsel);
/* branch-misses - not sampling */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", !evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", evsel->sample_read, evsel);
}
return TEST_OK;
}
@@ -1469,43 +1519,40 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (2 * num_core_entries()));
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries(evlist)),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* instructions - sampling group leader */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", evsel->sample_read, evsel);
/* branch-misses - not sampling */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude guest", !evsel->core.attr.exclude_guest, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude host", !evsel->core.attr.exclude_host, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
+ TEST_ASSERT_EVSEL("wrong sample_read", evsel->sample_read, evsel);
}
return TEST_OK;
}
@@ -1514,16 +1561,17 @@ static int test__checkevent_pinned_modifier(struct evlist *evlist)
{
struct evsel *evsel = NULL;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == num_core_entries());
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
- for (int i = 0; i < num_core_entries(); i++) {
+ for (int i = 0; i < num_core_entries(evlist); i++) {
evsel = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", evsel->core.attr.pinned, evsel);
}
return test__checkevent_symbolic_name(evlist);
}
@@ -1532,39 +1580,35 @@ static int test__pinned_group(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == (3 * num_core_entries()));
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == (3 * num_core_entries(evlist)),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles - group leader */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
/* TODO: The group modifier is not copied to the split group leader. */
if (perf_pmus__num_core_pmus() == 1)
- TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
+ TEST_ASSERT_EVSEL("wrong pinned", evsel->core.attr.pinned, evsel);
/* cache-misses - can not be pinned, but will go on with the leader */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", !evsel->core.attr.pinned, evsel);
/* branch-misses - ditto */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong pinned", !evsel->core.attr.pinned, evsel);
}
return TEST_OK;
}
@@ -1573,11 +1617,14 @@ static int test__checkevent_exclusive_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", evsel->core.attr.precise_ip, evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", evsel->core.attr.exclusive, evsel);
return test__checkevent_symbolic_name(evlist);
}
@@ -1586,39 +1633,35 @@ static int test__exclusive_group(struct evlist *evlist)
{
struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == 3 * num_core_entries());
-
- for (int i = 0; i < num_core_entries(); i++) {
- int ret;
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == 3 * num_core_entries(evlist),
+ evlist);
+ for (int i = 0; i < num_core_entries(evlist); i++) {
/* cycles - group leader */
evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong group name", !evsel->group_name, evsel);
+ TEST_ASSERT_EVSEL("wrong leader", evsel__has_leader(evsel, leader), evsel);
/* TODO: The group modifier is not copied to the split group leader. */
if (perf_pmus__num_core_pmus() == 1)
- TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+ TEST_ASSERT_EVSEL("wrong exclusive", evsel->core.attr.exclusive, evsel);
/* cache-misses - can not be pinned, but will go on with the leader */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CACHE_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", !evsel->core.attr.exclusive, evsel);
/* branch-misses - ditto */
evsel = evsel__next(evsel);
- ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES),
+ evsel);
+ TEST_ASSERT_EVSEL("wrong exclusive", !evsel->core.attr.exclusive, evsel);
}
return TEST_OK;
}
@@ -1626,13 +1669,13 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
- evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 ==
- evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type",
+ (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == evsel->core.attr.bp_type,
+ evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_1 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -1641,13 +1684,11 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
- TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W ==
- evsel->core.attr.bp_type);
- TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 ==
- evsel->core.attr.bp_len);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0 == evsel->core.attr.config, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_type", HW_BREAKPOINT_W == evsel->core.attr.bp_type, evsel);
+ TEST_ASSERT_EVSEL("wrong bp_len", HW_BREAKPOINT_LEN_2 == evsel->core.attr.bp_len, evsel);
return TEST_OK;
}
@@ -1657,10 +1698,11 @@ test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong exclude_user", !evsel->core.attr.exclude_user, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_hv", evsel->core.attr.exclude_hv, evsel);
+ TEST_ASSERT_EVSEL("wrong precise_ip", !evsel->core.attr.precise_ip, evsel);
return test__checkevent_breakpoint_rw(evlist);
}
@@ -1669,10 +1711,10 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries",
- evlist->core.nr_entries == 1 + num_core_entries());
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_TASK_CLOCK));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == 1 + num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("wrong type/config", evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK), evsel);
return TEST_OK;
}
@@ -1680,7 +1722,10 @@ static int test__checkevent_config_symbol(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "insn"));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("wrong name setting", evsel__name_is(evsel, "insn"), evsel);
return TEST_OK;
}
@@ -1688,7 +1733,8 @@ static int test__checkevent_config_raw(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "rawpmu"));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong name setting", evsel__name_is(evsel, "rawpmu"), evsel);
return TEST_OK;
}
@@ -1696,7 +1742,8 @@ static int test__checkevent_config_num(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "numpmu"));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong name setting", evsel__name_is(evsel, "numpmu"), evsel);
return TEST_OK;
}
@@ -1704,18 +1751,16 @@ static int test__checkevent_config_cache(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "cachepmu"));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("wrong name setting", evsel__name_is(evsel, "cachepmu"), evsel);
return test__checkevent_genhw(evlist);
}
-static bool test__pmu_cpu_valid(void)
+static bool test__pmu_default_core_event_valid(void)
{
- return !!perf_pmus__find("cpu");
-}
-
-static bool test__pmu_cpu_event_valid(void)
-{
- struct perf_pmu *pmu = perf_pmus__find("cpu");
+ struct perf_pmu *pmu = perf_pmus__find_core_pmu();
if (!pmu)
return false;
@@ -1732,7 +1777,56 @@ static int test__intel_pt(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "intel_pt//u"));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong name setting", evsel__name_is(evsel, "intel_pt//u"), evsel);
+ return TEST_OK;
+}
+
+static bool test__acr_valid(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (perf_pmu__has_format(pmu, "acr_mask"))
+ return true;
+ }
+
+ return false;
+}
+
+static int test__ratio_to_prev(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 * perf_pmus__num_core_pmus() == evlist->core.nr_entries);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!perf_pmu__has_format(evsel->pmu, "acr_mask"))
+ return TEST_OK;
+
+ if (evsel == evlist__first(evlist)) {
+ TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_CPU_CYCLES),
+ evsel);
+ } else {
+ TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong leader", !evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_EVSEL("unexpected event",
+ evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS),
+ evsel);
+ }
+ /*
+ * The period value gets configured within evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
+ }
return TEST_OK;
}
@@ -1740,9 +1834,13 @@ static int test__checkevent_complex_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong complex name parsing",
- evsel__name_is(evsel,
- "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks"));
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("wrong complex name parsing",
+ evsel__name_is(evsel,
+ "COMPLEX_CYCLES_NAME:orig=cpu-cycles,desc=chip-clock-ticks"),
+ evsel);
return TEST_OK;
}
@@ -1750,57 +1848,57 @@ static int test__checkevent_raw_pmu(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a));
+ TEST_ASSERT_EVLIST("wrong number of entries", 1 == evlist->core.nr_entries, evlist);
+ TEST_ASSERT_EVSEL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type, evsel);
+ TEST_ASSERT_EVSEL("wrong config", 0x1a == evsel->core.attr.config, evsel);
return TEST_OK;
}
static int test__sym_event_slash(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES), evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_kernel", evsel->core.attr.exclude_kernel, evsel);
return TEST_OK;
}
static int test__sym_event_dc(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES), evsel);
+ TEST_ASSERT_EVSEL("wrong exclude_user", evsel->core.attr.exclude_user, evsel);
return TEST_OK;
}
static int test__term_equal_term(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
- if (ret)
- return ret;
-
- TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES), evsel);
+ TEST_ASSERT_EVSEL("wrong name setting", strcmp(evsel->name, "name") == 0, evsel);
return TEST_OK;
}
static int test__term_equal_legacy(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
-
- if (ret)
- return ret;
- TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0);
+ TEST_ASSERT_EVLIST("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries(evlist),
+ evlist);
+ TEST_ASSERT_EVSEL("unexpected event", evsel__match(evsel, HARDWARE, HW_CPU_CYCLES), evsel);
+ TEST_ASSERT_EVSEL("wrong name setting", strcmp(evsel->name, "l1d") == 0, evsel);
return TEST_OK;
}
@@ -1891,7 +1989,7 @@ static const struct evlist_test test__events[] = {
/* 4 */
},
{
- .name = "cycles/period=100000,config2/",
+ .name = "cpu-cycles/period=100000,config2/",
.check = test__checkevent_symbolic_name_config,
/* 5 */
},
@@ -2006,27 +2104,27 @@ static const struct evlist_test test__events[] = {
/* 7 */
},
{
- .name = "{instructions:k,cycles:upp}",
+ .name = "{instructions:k,cpu-cycles:upp}",
.check = test__group1,
/* 8 */
},
{
- .name = "{faults:k,branches}:u,cycles:k",
+ .name = "{faults:k,branches}:u,cpu-cycles:k",
.check = test__group2,
/* 9 */
},
{
- .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+ .name = "group1{syscalls:sys_enter_openat:H,cpu-cycles:kppp},group2{cpu-cycles,1:3}:G,instructions:u",
.check = test__group3,
/* 0 */
},
{
- .name = "{cycles:u,instructions:kp}:p",
+ .name = "{cpu-cycles:u,instructions:kp}:p",
.check = test__group4,
/* 1 */
},
{
- .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
+ .name = "{cpu-cycles,instructions}:G,{cpu-cycles:G,instructions:G},cpu-cycles",
.check = test__group5,
/* 2 */
},
@@ -2036,27 +2134,27 @@ static const struct evlist_test test__events[] = {
/* 3 */
},
{
- .name = "{cycles,cache-misses:G}:H",
+ .name = "{cpu-cycles,cache-misses:G}:H",
.check = test__group_gh1,
/* 4 */
},
{
- .name = "{cycles,cache-misses:H}:G",
+ .name = "{cpu-cycles,cache-misses:H}:G",
.check = test__group_gh2,
/* 5 */
},
{
- .name = "{cycles:G,cache-misses:H}:u",
+ .name = "{cpu-cycles:G,cache-misses:H}:u",
.check = test__group_gh3,
/* 6 */
},
{
- .name = "{cycles:G,cache-misses:H}:uG",
+ .name = "{cpu-cycles:G,cache-misses:H}:uG",
.check = test__group_gh4,
/* 7 */
},
{
- .name = "{cycles,cache-misses,branch-misses}:S",
+ .name = "{cpu-cycles,cache-misses,branch-misses}:S",
.check = test__leader_sample1,
/* 8 */
},
@@ -2071,7 +2169,7 @@ static const struct evlist_test test__events[] = {
/* 0 */
},
{
- .name = "{cycles,cache-misses,branch-misses}:D",
+ .name = "{cpu-cycles,cache-misses,branch-misses}:D",
.check = test__pinned_group,
/* 1 */
},
@@ -2109,7 +2207,7 @@ static const struct evlist_test test__events[] = {
/* 6 */
},
{
- .name = "task-clock:P,cycles",
+ .name = "task-clock:P,cpu-cycles",
.check = test__checkevent_precise_max_modifier,
/* 7 */
},
@@ -2140,17 +2238,17 @@ static const struct evlist_test test__events[] = {
/* 2 */
},
{
- .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
+ .name = "cpu-cycles/name='COMPLEX_CYCLES_NAME:orig=cpu-cycles,desc=chip-clock-ticks'/Duk",
.check = test__checkevent_complex_name,
/* 3 */
},
{
- .name = "cycles//u",
+ .name = "cpu-cycles//u",
.check = test__sym_event_slash,
/* 4 */
},
{
- .name = "cycles:k",
+ .name = "cpu-cycles:k",
.check = test__sym_event_dc,
/* 5 */
},
@@ -2160,17 +2258,17 @@ static const struct evlist_test test__events[] = {
/* 6 */
},
{
- .name = "{cycles,cache-misses,branch-misses}:e",
+ .name = "{cpu-cycles,cache-misses,branch-misses}:e",
.check = test__exclusive_group,
/* 7 */
},
{
- .name = "cycles/name=name/",
+ .name = "cpu-cycles/name=name/",
.check = test__term_equal_term,
/* 8 */
},
{
- .name = "cycles/name=l1d/",
+ .name = "cpu-cycles/name=l1d/",
.check = test__term_equal_legacy,
/* 9 */
},
@@ -2249,30 +2347,34 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_tracepoint,
/* 4 */
},
+ {
+ .name = "{cycles,instructions/period=200000,ratio-to-prev=2.0/}",
+ .valid = test__acr_valid,
+ .check = test__ratio_to_prev,
+ /* 5 */
+ },
+
};
static const struct evlist_test test__events_pmu[] = {
{
- .name = "cpu/config=10,config1=1,config2=3,period=1000/u",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/config=10,config1=1,config2=3,period=1000/u",
.check = test__checkevent_pmu,
/* 0 */
},
{
- .name = "cpu/config=1,name=krava/u,cpu/config=2/u",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/config=1,name=krava/u,default_core/config=2/u",
.check = test__checkevent_pmu_name,
/* 1 */
},
{
- .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/config=1,call-graph=fp,time,period=100000/,default_core/config=2,call-graph=no,time=0,period=2000/",
.check = test__checkevent_pmu_partial_time_callgraph,
/* 2 */
},
{
- .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
- .valid = test__pmu_cpu_event_valid,
+ .name = "default_core/name='COMPLEX_CYCLES_NAME:orig=cpu-cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
+ .valid = test__pmu_default_core_event_valid,
.check = test__checkevent_complex_name,
/* 3 */
},
@@ -2287,158 +2389,132 @@ static const struct evlist_test test__events_pmu[] = {
/* 5 */
},
{
- .name = "cpu/L1-dcache-load-miss/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/L1-dcache-load-miss/",
.check = test__checkevent_genhw,
/* 6 */
},
{
- .name = "cpu/L1-dcache-load-miss/kp",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/L1-dcache-load-miss/kp",
.check = test__checkevent_genhw_modifier,
/* 7 */
},
{
- .name = "cpu/L1-dcache-misses,name=cachepmu/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/L1-dcache-misses,name=cachepmu/",
.check = test__checkevent_config_cache,
/* 8 */
},
{
- .name = "cpu/instructions/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/",
.check = test__checkevent_symbolic_name,
/* 9 */
},
{
- .name = "cpu/cycles,period=100000,config2/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/cycles,period=100000,config2/",
.check = test__checkevent_symbolic_name_config,
/* 0 */
},
{
- .name = "cpu/instructions/h",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/h",
.check = test__checkevent_symbolic_name_modifier,
/* 1 */
},
{
- .name = "cpu/instructions/G",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/G",
.check = test__checkevent_exclude_host_modifier,
/* 2 */
},
{
- .name = "cpu/instructions/H",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/H",
.check = test__checkevent_exclude_guest_modifier,
/* 3 */
},
{
- .name = "{cpu/instructions/k,cpu/cycles/upp}",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/instructions/k,default_core/cycles/upp}",
.check = test__group1,
/* 4 */
},
{
- .name = "{cpu/cycles/u,cpu/instructions/kp}:p",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/u,default_core/instructions/kp}:p",
.check = test__group4,
/* 5 */
},
{
- .name = "{cpu/cycles/,cpu/cache-misses/G}:H",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/,default_core/cache-misses/G}:H",
.check = test__group_gh1,
/* 6 */
},
{
- .name = "{cpu/cycles/,cpu/cache-misses/H}:G",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/,default_core/cache-misses/H}:G",
.check = test__group_gh2,
/* 7 */
},
{
- .name = "{cpu/cycles/G,cpu/cache-misses/H}:u",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/G,default_core/cache-misses/H}:u",
.check = test__group_gh3,
/* 8 */
},
{
- .name = "{cpu/cycles/G,cpu/cache-misses/H}:uG",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/G,default_core/cache-misses/H}:uG",
.check = test__group_gh4,
/* 9 */
},
{
- .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:S",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/,default_core/cache-misses/,default_core/branch-misses/}:S",
.check = test__leader_sample1,
/* 0 */
},
{
- .name = "{cpu/instructions/,cpu/branch-misses/}:Su",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/instructions/,default_core/branch-misses/}:Su",
.check = test__leader_sample2,
/* 1 */
},
{
- .name = "cpu/instructions/uDp",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/uDp",
.check = test__checkevent_pinned_modifier,
/* 2 */
},
{
- .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:D",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/,default_core/cache-misses/,default_core/branch-misses/}:D",
.check = test__pinned_group,
/* 3 */
},
{
- .name = "cpu/instructions/I",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/I",
.check = test__checkevent_exclude_idle_modifier,
/* 4 */
},
{
- .name = "cpu/instructions/kIG",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/kIG",
.check = test__checkevent_exclude_idle_modifier_1,
/* 5 */
},
{
- .name = "cpu/cycles/u",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/cycles/u",
.check = test__sym_event_slash,
/* 6 */
},
{
- .name = "cpu/cycles/k",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/cycles/k",
.check = test__sym_event_dc,
/* 7 */
},
{
- .name = "cpu/instructions/uep",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/instructions/uep",
.check = test__checkevent_exclusive_modifier,
/* 8 */
},
{
- .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:e",
- .valid = test__pmu_cpu_valid,
+ .name = "{default_core/cycles/,default_core/cache-misses/,default_core/branch-misses/}:e",
.check = test__exclusive_group,
/* 9 */
},
{
- .name = "cpu/cycles,name=name/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/cycles,name=name/",
.check = test__term_equal_term,
/* 0 */
},
{
- .name = "cpu/cycles,name=l1d/",
- .valid = test__pmu_cpu_valid,
+ .name = "default_core/cycles,name=l1d/",
.check = test__term_equal_legacy,
/* 1 */
},
@@ -2451,7 +2527,7 @@ struct terms_test {
static const struct terms_test test__terms[] = {
[0] = {
- .str = "config=10,config1,config2=3,config3=4,umask=1,read,r0xead",
+ .str = "config=10,config1,config2=3,config3=4,config4=5,umask=1,read,r0xead",
.check = test__checkterms_simple,
},
};
@@ -2528,15 +2604,30 @@ static int combine_test_results(int existing, int latest)
static int test_events(const struct evlist_test *events, int cnt)
{
int ret = TEST_OK;
+ struct perf_pmu *core_pmu = perf_pmus__find_core_pmu();
for (int i = 0; i < cnt; i++) {
- const struct evlist_test *e = &events[i];
+ struct evlist_test e = events[i];
int test_ret;
+ const char *pos = e.name;
+ char buf[1024], *buf_pos = buf, *end;
+
+ while ((end = strstr(pos, "default_core"))) {
+ size_t len = end - pos;
+
+ strncpy(buf_pos, pos, len);
+ pos = end + 12;
+ buf_pos += len;
+ strcpy(buf_pos, core_pmu->name);
+ buf_pos += strlen(core_pmu->name);
+ }
+ strcpy(buf_pos, pos);
- pr_debug("running test %d '%s'\n", i, e->name);
- test_ret = test_event(e);
+ e.name = buf;
+ pr_debug("running test %d '%s'\n", i, e.name);
+ test_ret = test_event(&e);
if (test_ret != TEST_OK) {
- pr_debug("Event test failure: test %d '%s'", i, e->name);
+ pr_debug("Event test failure: test %d '%s'", i, e.name);
ret = combine_test_results(ret, test_ret);
}
}
@@ -2556,7 +2647,7 @@ static int test_term(const struct terms_test *t)
parse_events_terms__init(&terms);
- ret = parse_events_terms(&terms, t->str, /*input=*/ NULL);
+ ret = parse_events_terms(&terms, t->str);
if (ret) {
pr_debug("failed to parse terms '%s', err %d\n",
t->str , ret);
@@ -2777,8 +2868,9 @@ static int test__checkevent_pmu_events_alias(struct evlist *evlist)
struct evsel *evsel1 = evlist__first(evlist);
struct evsel *evsel2 = evlist__last(evlist);
- TEST_ASSERT_VAL("wrong type", evsel1->core.attr.type == evsel2->core.attr.type);
- TEST_ASSERT_VAL("wrong config", evsel1->core.attr.config == evsel2->core.attr.config);
+ TEST_ASSERT_EVSEL("wrong type", evsel1->core.attr.type == evsel2->core.attr.type, evsel1);
+ TEST_ASSERT_EVSEL("wrong config", evsel1->core.attr.config == evsel2->core.attr.config,
+ evsel1);
return TEST_OK;
}
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 66a5275917e2..6bbc209a5c6a 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
+#include <errno.h>
#include <string.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
@@ -40,8 +41,6 @@ static void load_runtime_stat(struct evlist *evlist, struct value *vals)
count = find_value(evsel->name, vals);
evsel->supported = true;
evsel->stats->aggr->counts.val = count;
- if (evsel__name_is(evsel, "duration_time"))
- update_stats(&walltime_nsecs_stats, count);
}
}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 0b3c37e66871..efbd9cd60c63 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -13,15 +13,19 @@
#include "tests.h"
#include "util/mmap.h"
#include "util/sample.h"
+#include "util/cpumap.h"
static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
{
- int i, cpu = -1, nrcpus = 1024;
+ int i, cpu = -1;
+ int nrcpus = cpu__max_cpu().cpu;
+ size_t size = CPU_ALLOC_SIZE(nrcpus);
+
realloc:
- CPU_ZERO(maskp);
+ CPU_ZERO_S(size, maskp);
- if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
- if (errno == EINVAL && nrcpus < (1024 << 8)) {
+ if (sched_getaffinity(pid, size, maskp) == -1) {
+ if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
nrcpus = nrcpus << 2;
goto realloc;
}
@@ -30,11 +34,11 @@ realloc:
}
for (i = 0; i < nrcpus; i++) {
- if (CPU_ISSET(i, maskp)) {
+ if (CPU_ISSET_S(i, size, maskp)) {
if (cpu == -1)
cpu = i;
else
- CPU_CLR(i, maskp);
+ CPU_CLR_S(i, size, maskp);
}
}
@@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
.no_buffering = true,
.mmap_pages = 256,
};
- cpu_set_t cpu_mask;
- size_t cpu_mask_size = sizeof(cpu_mask);
+ int nrcpus = cpu__max_cpu().cpu;
+ cpu_set_t *cpu_mask;
+ size_t cpu_mask_size;
struct evlist *evlist = evlist__new_dummy();
struct evsel *evsel;
struct perf_sample sample;
@@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
char sbuf[STRERR_BUFSIZE];
+ cpu_mask = CPU_ALLOC(nrcpus);
+ if (!cpu_mask) {
+ pr_debug("failed to create cpumask\n");
+ goto out;
+ }
+
+ cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(cpu_mask_size, cpu_mask);
+
perf_sample__init(&sample, /*all=*/false);
if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
evlist = evlist__new_default();
if (evlist == NULL) {
pr_debug("Not enough memory to create evlist\n");
+ CPU_FREE(cpu_mask);
goto out;
}
@@ -111,10 +126,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
evsel__set_sample_bit(evsel, TIME);
evlist__config(evlist, &opts, NULL);
- err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
+ err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
if (err < 0) {
pr_debug("sched__get_first_possible_cpu: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
+ evlist__cancel_workload(evlist);
goto out_delete_evlist;
}
@@ -123,9 +139,10 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
/*
* So that we can check perf_sample.cpu on all the samples.
*/
- if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
+ if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
pr_debug("sched_setaffinity: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
+ evlist__cancel_workload(evlist);
goto out_delete_evlist;
}
@@ -137,6 +154,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
if (err < 0) {
pr_debug("perf_evlist__open: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
+ evlist__cancel_workload(evlist);
goto out_delete_evlist;
}
@@ -149,6 +167,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
if (err < 0) {
pr_debug("evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
+ evlist__cancel_workload(evlist);
goto out_delete_evlist;
}
@@ -328,6 +347,7 @@ found_exit:
++errs;
}
out_delete_evlist:
+ CPU_FREE(cpu_mask);
evlist__delete(evlist);
out:
perf_sample__exit(&sample);
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index d4437410c99f..cca41bd37ae3 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -101,11 +101,11 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
perf_evlist__set_maps(&evlist->core, cpus, threads);
- CHECK__(parse_event(evlist, "cycles:u"));
+ CHECK__(parse_event(evlist, "cpu-cycles:u"));
evlist__config(evlist, &opts, NULL);
- /* For hybrid "cycles:u", it creates two events */
+ /* For hybrid "cpu-cycles:u", it creates two events */
evlist__for_each_entry(evlist, evsel) {
evsel->core.attr.comm = 1;
evsel->core.attr.disabled = 1;
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index 2e38dfa34b6c..fca4a86452df 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -4,6 +4,7 @@
*
* Copyright 2020 Google LLC.
*/
+#include <errno.h>
#include "tests.h"
#include "util/debug.h"
#include "util/evlist.h"
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 95fd9f671a22..a99716862168 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -22,10 +22,6 @@ struct perf_pmu_test_event {
/* used for matching against events from generated pmu-events.c */
struct pmu_event event;
- /* used for matching against event aliases */
- /* extra events for aliases */
- const char *alias_str;
-
/*
* Note: For when PublicDescription does not exist in the JSON, we
* will have no long_desc in pmu_event.long_desc, but long_desc may
@@ -52,7 +48,6 @@ static const struct perf_pmu_test_event bp_l1_btb_correct = {
.desc = "L1 BTB Correction",
.topic = "branch",
},
- .alias_str = "event=0x8a",
};
static const struct perf_pmu_test_event bp_l2_btb_correct = {
@@ -63,7 +58,6 @@ static const struct perf_pmu_test_event bp_l2_btb_correct = {
.desc = "L2 BTB Correction",
.topic = "branch",
},
- .alias_str = "event=0x8b",
};
static const struct perf_pmu_test_event segment_reg_loads_any = {
@@ -74,7 +68,6 @@ static const struct perf_pmu_test_event segment_reg_loads_any = {
.desc = "Number of segment register loads",
.topic = "other",
},
- .alias_str = "event=0x6,period=0x30d40,umask=0x80",
};
static const struct perf_pmu_test_event dispatch_blocked_any = {
@@ -85,7 +78,6 @@ static const struct perf_pmu_test_event dispatch_blocked_any = {
.desc = "Memory cluster signals to block micro-op dispatch for any reason",
.topic = "other",
},
- .alias_str = "event=0x9,period=0x30d40,umask=0x20",
};
static const struct perf_pmu_test_event eist_trans = {
@@ -96,7 +88,6 @@ static const struct perf_pmu_test_event eist_trans = {
.desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
.topic = "other",
},
- .alias_str = "event=0x3a,period=0x30d40",
};
static const struct perf_pmu_test_event l3_cache_rd = {
@@ -108,7 +99,6 @@ static const struct perf_pmu_test_event l3_cache_rd = {
.long_desc = "Attributable Level 3 cache access, read",
.topic = "cache",
},
- .alias_str = "event=0x40",
.alias_long_desc = "Attributable Level 3 cache access, read",
};
@@ -130,7 +120,6 @@ static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = {
.topic = "uncore",
.pmu = "hisi_sccl,ddrc",
},
- .alias_str = "event=0x2",
.matching_pmu = "hisi_sccl1_ddrc2",
};
@@ -142,7 +131,6 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
.topic = "uncore",
.pmu = "uncore_cbox",
},
- .alias_str = "event=0x22,umask=0x81",
.matching_pmu = "uncore_cbox_0",
};
@@ -154,7 +142,6 @@ static const struct perf_pmu_test_event uncore_hyphen = {
.topic = "uncore",
.pmu = "uncore_cbox",
},
- .alias_str = "event=0xe0",
.matching_pmu = "uncore_cbox_0",
};
@@ -166,7 +153,6 @@ static const struct perf_pmu_test_event uncore_two_hyph = {
.topic = "uncore",
.pmu = "uncore_cbox",
},
- .alias_str = "event=0xc0",
.matching_pmu = "uncore_cbox_0",
};
@@ -178,7 +164,6 @@ static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
.topic = "uncore",
.pmu = "hisi_sccl,l3c",
},
- .alias_str = "event=0x7",
.matching_pmu = "hisi_sccl3_l3c7",
};
@@ -190,7 +175,6 @@ static const struct perf_pmu_test_event uncore_imc_free_running_cache_miss = {
.topic = "uncore",
.pmu = "uncore_imc_free_running",
},
- .alias_str = "event=0x12",
.matching_pmu = "uncore_imc_free_running_0",
};
@@ -202,7 +186,6 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
.topic = "uncore",
.pmu = "uncore_imc",
},
- .alias_str = "event=0x34",
.matching_pmu = "uncore_imc_0",
};
@@ -226,7 +209,6 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = {
.pmu = "uncore_sys_ddr_pmu",
.compat = "v8",
},
- .alias_str = "event=0x2b",
.matching_pmu = "uncore_sys_ddr_pmu0",
};
@@ -239,7 +221,6 @@ static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = {
.pmu = "uncore_sys_ccn_pmu",
.compat = "0x01",
},
- .alias_str = "config=0x2c",
.matching_pmu = "uncore_sys_ccn_pmu4",
};
@@ -252,7 +233,6 @@ static const struct perf_pmu_test_event sys_cmn_pmu_hnf_cache_miss = {
.pmu = "uncore_sys_cmn_pmu",
.compat = "(434|436|43c|43a).*",
},
- .alias_str = "eventid=0x1,type=0x5",
.matching_pmu = "uncore_sys_cmn_pmu0",
};
@@ -374,9 +354,9 @@ static int compare_alias_to_test_event(struct pmu_event_info *alias,
return -1;
}
- if (!is_same(alias->str, test_event->alias_str)) {
+ if (!is_same(alias->str, test_event->event.event)) {
pr_debug("testing aliases PMU %s: mismatched str, %s vs %s\n",
- pmu_name, alias->str, test_event->alias_str);
+ pmu_name, alias->str, test_event->event.event);
return -1;
}
@@ -892,8 +872,6 @@ static int test__parsing_callback(const struct pmu_metric *pm,
evlist__alloc_aggr_stats(evlist, 1);
evlist__for_each_entry(evlist, evsel) {
evsel->stats->aggr->counts.val = k;
- if (evsel__name_is(evsel, "duration_time"))
- update_stats(&walltime_nsecs_stats, k);
k++;
}
evlist__for_each_entry(evlist, evsel) {
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 4a9f8e090cf4..cbded2c6faa4 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -169,8 +169,7 @@ static int test__pmu_format(struct test_suite *test __maybe_unused, int subtest
parse_events_terms__init(&terms);
if (parse_events_terms(&terms,
"krava01=15,krava02=170,krava03=1,krava11=27,krava12=1,"
- "krava13=2,krava21=119,krava22=11,krava23=2",
- NULL)) {
+ "krava13=2,krava21=119,krava22=11,krava23=2")) {
pr_err("Term parsing failed\n");
goto err_out;
}
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
deleted file mode 100644
index 0ebc22ac8d5b..000000000000
--- a/tools/perf/tests/python-use.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Just test if we can load the python binding.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/compiler.h>
-#include "tests.h"
-#include "util/debug.h"
-
-static int test__python_use(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
-{
- char *cmd;
- int ret;
-
- if (asprintf(&cmd, "echo \"import sys ; sys.path.insert(0, '%s'); import perf\" | %s %s",
- PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
- return -1;
-
- pr_debug("python usage test: \"%s\"\n", cmd);
- ret = system(cmd) ? -1 : 0;
- free(cmd);
- return ret;
-}
-
-DEFINE_SUITE("'import perf' in python", python_use);
diff --git a/tools/perf/tests/shell/amd-ibs-swfilt.sh b/tools/perf/tests/shell/amd-ibs-swfilt.sh
index 7045ec72ba4c..e7f66df05c4b 100755
--- a/tools/perf/tests/shell/amd-ibs-swfilt.sh
+++ b/tools/perf/tests/shell/amd-ibs-swfilt.sh
@@ -1,6 +1,10 @@
#!/bin/bash
# AMD IBS software filtering
+ParanoidAndNotRoot() {
+ [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
+}
+
echo "check availability of IBS swfilt"
# check if IBS PMU is available
@@ -16,6 +20,7 @@ if [ ! -f /sys/bus/event_source/devices/ibs_op/format/swfilt ]; then
fi
echo "run perf record with modifier and swfilt"
+err=0
# setting any modifiers should fail
perf record -B -e ibs_op//u -o /dev/null true 2> /dev/null
@@ -31,11 +36,17 @@ if [ $? -ne 0 ]; then
exit 1
fi
-# setting it with swfilt=1 should be fine
-perf record -B -e ibs_op/swfilt=1/k -o /dev/null true
-if [ $? -ne 0 ]; then
- echo "[FAIL] IBS op PMU cannot handle swfilt for exclude_user"
- exit 1
+if ! ParanoidAndNotRoot 1
+then
+ # setting it with swfilt=1 should be fine
+ perf record -B -e ibs_op/swfilt=1/k -o /dev/null true
+ if [ $? -ne 0 ]; then
+ echo "[FAIL] IBS op PMU cannot handle swfilt for exclude_user"
+ exit 1
+ fi
+else
+ echo "[SKIP] not root and perf_event_paranoid too high for exclude_user"
+ err=2
fi
# check ibs_fetch PMU as well
@@ -46,10 +57,16 @@ if [ $? -ne 0 ]; then
fi
# check system wide recording
-perf record -aB --synth=no -e ibs_op/swfilt/k -o /dev/null true
-if [ $? -ne 0 ]; then
- echo "[FAIL] IBS op PMU cannot handle swfilt in system-wide mode"
- exit 1
+if ! ParanoidAndNotRoot 0
+then
+ perf record -aB --synth=no -e ibs_op/swfilt/k -o /dev/null true
+ if [ $? -ne 0 ]; then
+ echo "[FAIL] IBS op PMU cannot handle swfilt in system-wide mode"
+ exit 1
+ fi
+else
+ echo "[SKIP] not root and perf_event_paranoid too high for system-wide/exclude_user"
+ err=2
fi
echo "check number of samples with swfilt"
@@ -60,8 +77,16 @@ if [ ${kernel_sample} -ne 0 ]; then
exit 1
fi
-user_sample=$(perf record -e ibs_fetch/swfilt/k -o- true | perf script -i- -F misc | grep -c ^U)
-if [ ${user_sample} -ne 0 ]; then
- echo "[FAIL] unexpected user samples: " ${user_sample}
- exit 1
+if ! ParanoidAndNotRoot 1
+then
+ user_sample=$(perf record -e ibs_fetch/swfilt/k -o- true | perf script -i- -F misc | grep -c ^U)
+ if [ ${user_sample} -ne 0 ]; then
+ echo "[FAIL] unexpected user samples: " ${user_sample}
+ exit 1
+ fi
+else
+ echo "[SKIP] not root and perf_event_paranoid too high for exclude_user"
+ err=2
fi
+
+exit $err
diff --git a/tools/perf/tests/shell/attr/test-stat-default b/tools/perf/tests/shell/attr/test-stat-default
index e47fb4944679..8dd27c1fb661 100644
--- a/tools/perf/tests/shell/attr/test-stat-default
+++ b/tools/perf/tests/shell/attr/test-stat-default
@@ -227,3 +227,10 @@ fd=28
type=4
config=270
optional=1
+
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
+[event29:base-stat]
+fd=29
+type=4
+config=4269
+optional=1 \ No newline at end of file
diff --git a/tools/perf/tests/shell/attr/test-stat-detailed-1 b/tools/perf/tests/shell/attr/test-stat-detailed-1
index 3d500d3e0c5c..12a2ebf4e64a 100644
--- a/tools/perf/tests/shell/attr/test-stat-detailed-1
+++ b/tools/perf/tests/shell/attr/test-stat-detailed-1
@@ -269,3 +269,10 @@ fd=32
type=3
config=65538
optional=1
+
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
+[event33:base-stat]
+fd=33
+type=4
+config=4269
+optional=1 \ No newline at end of file
diff --git a/tools/perf/tests/shell/attr/test-stat-detailed-2 b/tools/perf/tests/shell/attr/test-stat-detailed-2
index 01777a63752f..66ea25b7d38f 100644
--- a/tools/perf/tests/shell/attr/test-stat-detailed-2
+++ b/tools/perf/tests/shell/attr/test-stat-detailed-2
@@ -329,3 +329,10 @@ fd=38
type=3
config=65540
optional=1
+
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
+[event39:base-stat]
+fd=39
+type=4
+config=4269
+optional=1 \ No newline at end of file
diff --git a/tools/perf/tests/shell/attr/test-stat-detailed-3 b/tools/perf/tests/shell/attr/test-stat-detailed-3
index 8400abd7e1e4..4a27bbfb9f87 100644
--- a/tools/perf/tests/shell/attr/test-stat-detailed-3
+++ b/tools/perf/tests/shell/attr/test-stat-detailed-3
@@ -349,3 +349,10 @@ fd=40
type=3
config=66048
optional=1
+
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
+[event41:base-stat]
+fd=41
+type=4
+config=4269
+optional=1 \ No newline at end of file
diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
index 8226449ac5c3..f74aab5c5d7f 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
@@ -13,11 +13,12 @@
# they must be skipped.
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
# skip if not supported
BLACKFUNC_LIST=`head -n 5 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2`
if [ -z "$BLACKFUNC_LIST" ]; then
@@ -53,7 +54,8 @@ for BLACKFUNC in $BLACKFUNC_LIST; do
PERF_EXIT_CODE=$?
# check for bad DWARF polluting the result
- ../common/check_all_patterns_found.pl "$REGEX_MISSING_DECL_LINE" >/dev/null < $LOGS_DIR/adding_blacklisted.err
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_MISSING_DECL_LINE" >/dev/null < $LOGS_DIR/adding_blacklisted.err
if [ $? -eq 0 ]; then
SKIP_DWARF=1
@@ -73,7 +75,11 @@ for BLACKFUNC in $BLACKFUNC_LIST; do
fi
fi
else
- ../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err
+ "$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" \
+ "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" \
+ "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" \
+ "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err
CHECK_EXIT_CODE=$?
SKIP_DWARF=0
@@ -94,7 +100,9 @@ fi
$CMD_PERF list probe:\* > $LOGS_DIR/adding_blacklisted_list.log
PERF_EXIT_CODE=$?
-../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" < $LOGS_DIR/adding_blacklisted_list.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" \
+ < $LOGS_DIR/adding_blacklisted_list.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing blacklisted probe (should NOT be listed)"
diff --git a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
index df288cf90cd6..555a825d55f2 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
@@ -13,13 +13,14 @@
# and removing.
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
# shellcheck source=lib/probe_vfs_getname.sh
-. "$(dirname "$0")/../lib/probe_vfs_getname.sh"
+. "$DIR_PATH/../lib/probe_vfs_getname.sh"
TEST_PROBE=${TEST_PROBE:-"inode_permission"}
@@ -44,7 +45,9 @@ for opt in "" "-a" "--add"; do
$CMD_PERF probe $opt $TEST_PROBE 2> $LOGS_DIR/adding_kernel_add$opt.err
PERF_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_add$opt.err
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" \
+ < $LOGS_DIR/adding_kernel_add$opt.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding probe $TEST_PROBE :: $opt"
@@ -58,7 +61,10 @@ done
$CMD_PERF list probe:\* > $LOGS_DIR/adding_kernel_list.log
PERF_EXIT_CODE=$?
-../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "probe:${TEST_PROBE}(?:_\d+)?\s+\[Tracepoint event\]" "Metric Groups:" < $LOGS_DIR/adding_kernel_list.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$RE_LINE_EMPTY" "List of pre-defined events" \
+ "probe:${TEST_PROBE}(?:_\d+)?\s+\[Tracepoint event\]" \
+ "Metric Groups:" < $LOGS_DIR/adding_kernel_list.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing added probe :: perf list"
@@ -71,7 +77,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing added probe :: perf list
$CMD_PERF probe -l > $LOGS_DIR/adding_kernel_list-l.log
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "\s*probe:${TEST_PROBE}(?:_\d+)?\s+\(on ${TEST_PROBE}(?:[:\+]$RE_NUMBER_HEX)?@.+\)" < $LOGS_DIR/adding_kernel_list-l.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "\s*probe:${TEST_PROBE}(?:_\d+)?\s+\(on ${TEST_PROBE}(?:[:\+]$RE_NUMBER_HEX)?@.+\)" \
+ < $LOGS_DIR/adding_kernel_list-l.log
CHECK_EXIT_CODE=$?
if [ $NO_DEBUGINFO ] ; then
@@ -93,9 +101,13 @@ REGEX_STAT_VALUES="\s*\d+\s+probe:$TEST_PROBE"
# the value should be greater than 1
REGEX_STAT_VALUE_NONZERO="\s*[1-9][0-9]*\s+probe:$TEST_PROBE"
REGEX_STAT_TIME="\s*$RE_NUMBER\s+seconds (?:time elapsed|user|sys)"
-../common/check_all_lines_matched.pl "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUES" "$REGEX_STAT_TIME" "$RE_LINE_COMMENT" "$RE_LINE_EMPTY" < $LOGS_DIR/adding_kernel_using_probe.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUES" "$REGEX_STAT_TIME" \
+ "$RE_LINE_COMMENT" "$RE_LINE_EMPTY" < $LOGS_DIR/adding_kernel_using_probe.log
CHECK_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUE_NONZERO" "$REGEX_STAT_TIME" < $LOGS_DIR/adding_kernel_using_probe.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUE_NONZERO" "$REGEX_STAT_TIME" \
+ < $LOGS_DIR/adding_kernel_using_probe.log
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using added probe"
@@ -108,7 +120,8 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using added probe"
$CMD_PERF probe -d $TEST_PROBE\* 2> $LOGS_DIR/adding_kernel_removing.err
PERF_EXIT_CODE=$?
-../common/check_all_lines_matched.pl "Removed event: probe:$TEST_PROBE" < $LOGS_DIR/adding_kernel_removing.err
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "Removed event: probe:$TEST_PROBE" < $LOGS_DIR/adding_kernel_removing.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "deleting added probe"
@@ -121,7 +134,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "deleting added probe"
$CMD_PERF list probe:\* > $LOGS_DIR/adding_kernel_list_removed.log
PERF_EXIT_CODE=$?
-../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" < $LOGS_DIR/adding_kernel_list_removed.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" \
+ < $LOGS_DIR/adding_kernel_list_removed.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing removed probe (should NOT be listed)"
@@ -135,7 +150,9 @@ $CMD_PERF probe -n --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_dryrun.err
PERF_EXIT_CODE=$?
# check for the output (should be the same as usual)
-../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_dryrun.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" \
+ < $LOGS_DIR/adding_kernel_dryrun.err
CHECK_EXIT_CODE=$?
# check that no probe was added in real
@@ -152,7 +169,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "dry run :: adding probe"
$CMD_PERF probe --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_01.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_forceadd_01.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" \
+ < $LOGS_DIR/adding_kernel_forceadd_01.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: first probe adding"
@@ -162,7 +181,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: first pro
! $CMD_PERF probe --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_02.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "Error: event \"$TEST_PROBE\" already exists." "Error: Failed to add events." < $LOGS_DIR/adding_kernel_forceadd_02.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Error: event \"$TEST_PROBE\" already exists." \
+ "Error: Failed to add events." < $LOGS_DIR/adding_kernel_forceadd_02.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: second probe adding (without force)"
@@ -173,7 +194,9 @@ NO_OF_PROBES=`$CMD_PERF probe -l $TEST_PROBE| wc -l`
$CMD_PERF probe --force --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_03.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "Added new events?:" "probe:${TEST_PROBE}_${NO_OF_PROBES}" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_forceadd_03.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Added new events?:" "probe:${TEST_PROBE}_${NO_OF_PROBES}" \
+ "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_forceadd_03.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: second probe adding (with force)"
@@ -187,7 +210,9 @@ $CMD_PERF stat -e probe:$TEST_PROBE -e probe:${TEST_PROBE}_${NO_OF_PROBES} -x';'
PERF_EXIT_CODE=$?
REGEX_LINE="$RE_NUMBER;+probe:${TEST_PROBE}_?(?:$NO_OF_PROBES)?;$RE_NUMBER;$RE_NUMBER"
-../common/check_all_lines_matched.pl "$REGEX_LINE" "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" < $LOGS_DIR/adding_kernel_using_two.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$REGEX_LINE" "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" \
+ < $LOGS_DIR/adding_kernel_using_two.log
CHECK_EXIT_CODE=$?
VALUE_1=`grep "$TEST_PROBE;" $LOGS_DIR/adding_kernel_using_two.log | awk -F';' '{print $1}'`
@@ -205,7 +230,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using doubled probe"
$CMD_PERF probe --del \* 2> $LOGS_DIR/adding_kernel_removing_wildcard.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "Removed event: probe:$TEST_PROBE" "Removed event: probe:${TEST_PROBE}_1" < $LOGS_DIR/adding_kernel_removing_wildcard.err
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "Removed event: probe:$TEST_PROBE" \
+ "Removed event: probe:${TEST_PROBE}_1" < $LOGS_DIR/adding_kernel_removing_wildcard.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "removing multiple probes"
@@ -217,7 +244,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "removing multiple probes"
$CMD_PERF probe -nf --max-probes=512 -a 'vfs_* $params' 2> $LOGS_DIR/adding_kernel_adding_wildcard.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "probe:vfs_mknod" "probe:vfs_create" "probe:vfs_rmdir" "probe:vfs_link" "probe:vfs_write" < $LOGS_DIR/adding_kernel_adding_wildcard.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "probe:vfs_mknod" "probe:vfs_create" "probe:vfs_rmdir" \
+ "probe:vfs_link" "probe:vfs_write" < $LOGS_DIR/adding_kernel_adding_wildcard.err
CHECK_EXIT_CODE=$?
if [ $NO_DEBUGINFO ] ; then
@@ -240,13 +269,22 @@ test $PERF_EXIT_CODE -ne 139 -a $PERF_EXIT_CODE -ne 0
PERF_EXIT_CODE=$?
# check that the error message is reasonable
-../common/check_all_patterns_found.pl "Failed to find" "somenonexistingrandomstuffwhichisalsoprettylongorevenlongertoexceed64" < $LOGS_DIR/adding_kernel_nonexisting.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Failed to find" \
+ "somenonexistingrandomstuffwhichisalsoprettylongorevenlongertoexceed64" \
+ < $LOGS_DIR/adding_kernel_nonexisting.err
CHECK_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "in this function|at this address" "Error" "Failed to add events" < $LOGS_DIR/adding_kernel_nonexisting.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "in this function|at this address" "Error" "Failed to add events" \
+ < $LOGS_DIR/adding_kernel_nonexisting.err
(( CHECK_EXIT_CODE += $? ))
-../common/check_all_lines_matched.pl "Failed to find" "Error" "Probe point .+ not found" "optimized out" "Use.+\-\-range option to show.+location range" < $LOGS_DIR/adding_kernel_nonexisting.err
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "Failed to find" "Error" "Probe point .+ not found" "optimized out" \
+ "Use.+\-\-range option to show.+location range" \
+ < $LOGS_DIR/adding_kernel_nonexisting.err
(( CHECK_EXIT_CODE += $? ))
-../common/check_no_patterns_found.pl "$RE_SEGFAULT" < $LOGS_DIR/adding_kernel_nonexisting.err
+"$DIR_PATH/../common/check_no_patterns_found.pl" \
+ "$RE_SEGFAULT" < $LOGS_DIR/adding_kernel_nonexisting.err
(( CHECK_EXIT_CODE += $? ))
if [ $NO_DEBUGINFO ]; then
@@ -264,7 +302,10 @@ fi
$CMD_PERF probe --add "$TEST_PROBE%return \$retval" 2> $LOGS_DIR/adding_kernel_func_retval_add.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE%return with \\\$retval" < $LOGS_DIR/adding_kernel_func_retval_add.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Added new events?:" "probe:$TEST_PROBE" \
+ "on $TEST_PROBE%return with \\\$retval" \
+ < $LOGS_DIR/adding_kernel_func_retval_add.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function with retval :: add"
@@ -274,7 +315,9 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function with retval :: add"
$CMD_PERF record -e probe:$TEST_PROBE\* -o $CURRENT_TEST_DIR/perf.data -- cat /proc/cpuinfo > /dev/null 2> $LOGS_DIR/adding_kernel_func_retval_record.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/adding_kernel_func_retval_record.err
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" \
+ < $LOGS_DIR/adding_kernel_func_retval_record.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function with retval :: record"
@@ -285,9 +328,11 @@ $CMD_PERF script -i $CURRENT_TEST_DIR/perf.data > $LOGS_DIR/adding_kernel_func_r
PERF_EXIT_CODE=$?
REGEX_SCRIPT_LINE="\s*cat\s+$RE_NUMBER\s+\[$RE_NUMBER\]\s+$RE_NUMBER:\s+probe:$TEST_PROBE\w*:\s+\($RE_NUMBER_HEX\s+<\-\s+$RE_NUMBER_HEX\)\s+arg1=$RE_NUMBER_HEX"
-../common/check_all_lines_matched.pl "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
CHECK_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function argument probing :: script"
diff --git a/tools/perf/tests/shell/base_probe/test_basic.sh b/tools/perf/tests/shell/base_probe/test_basic.sh
index 9d8b5afbeddd..162838ddc974 100755
--- a/tools/perf/tests/shell/base_probe/test_basic.sh
+++ b/tools/perf/tests/shell/base_probe/test_basic.sh
@@ -12,11 +12,12 @@
# This test tests basic functionality of perf probe command.
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
if ! check_kprobes_available; then
print_overall_skipped
exit 2
@@ -30,15 +31,25 @@ if [ "$PARAM_GENERAL_HELP_TEXT_CHECK" = "y" ]; then
$CMD_PERF probe --help > $LOGS_DIR/basic_helpmsg.log 2> $LOGS_DIR/basic_helpmsg.err
PERF_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "PERF-PROBE" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" "PROBE\s+SYNTAX" "PROBE\s+ARGUMENT" "LINE\s+SYNTAX" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "PERF-PROBE" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" \
+ "PROBE\s+SYNTAX" "PROBE\s+ARGUMENT" "LINE\s+SYNTAX" \
+ < $LOGS_DIR/basic_helpmsg.log
CHECK_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "LAZY\s+MATCHING" "FILTER\s+PATTERN" "EXAMPLES" "SEE\s+ALSO" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "LAZY\s+MATCHING" "FILTER\s+PATTERN" "EXAMPLES" "SEE\s+ALSO" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "vmlinux" "module=" "source=" "verbose" "quiet" "add=" "del=" "list.*EVENT" "line=" "vars=" "externs" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "vmlinux" "module=" "source=" "verbose" "quiet" "add=" "del=" \
+ "list.*EVENT" "line=" "vars=" "externs" < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "no-inlines" "funcs.*FILTER" "filter=FILTER" "force" "dry-run" "max-probes" "exec=" "demangle-kernel" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "no-inlines" "funcs.*FILTER" "filter=FILTER" "force" "dry-run" \
+ "max-probes" "exec=" "demangle-kernel" < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_no_patterns_found.pl "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
+ "$DIR_PATH/../common/check_no_patterns_found.pl" \
+ "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "help message"
@@ -53,7 +64,9 @@ fi
# without any args perf-probe should print usage
$CMD_PERF probe 2> $LOGS_DIR/basic_usage.log > /dev/null
-../common/check_all_patterns_found.pl "[Uu]sage" "perf probe" "verbose" "quiet" "add" "del" "force" "line" "vars" "externs" "range" < $LOGS_DIR/basic_usage.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "[Uu]sage" "perf probe" "verbose" "quiet" "add" "del" "force" \
+ "line" "vars" "externs" "range" < $LOGS_DIR/basic_usage.log
CHECK_EXIT_CODE=$?
print_results 0 $CHECK_EXIT_CODE "usage message"
diff --git a/tools/perf/tests/shell/base_probe/test_invalid_options.sh b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
index 92f7254eb32a..44a3ae014bfa 100755
--- a/tools/perf/tests/shell/base_probe/test_invalid_options.sh
+++ b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
@@ -12,11 +12,12 @@
# This test checks whether the invalid and incompatible options are reported
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
if ! check_kprobes_available; then
print_overall_skipped
exit 2
@@ -33,7 +34,9 @@ for opt in '-a' '-d' '-L' '-V'; do
! $CMD_PERF probe $opt 2> $LOGS_DIR/invalid_options_missing_argument$opt.err
PERF_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "Error: switch .* requires a value" < $LOGS_DIR/invalid_options_missing_argument$opt.err
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Error: switch .* requires a value" \
+ < $LOGS_DIR/invalid_options_missing_argument$opt.err
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "missing argument for $opt"
@@ -66,7 +69,8 @@ for opt in '-a xxx -d xxx' '-a xxx -L foo' '-a xxx -V foo' '-a xxx -l' '-a xxx -
! $CMD_PERF probe $opt > /dev/null 2> $LOGS_DIR/aux.log
PERF_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "Error: switch .+ cannot be used with switch .+" < $LOGS_DIR/aux.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "Error: switch .+ cannot be used with switch .+" < $LOGS_DIR/aux.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "mutually exclusive options :: $opt"
diff --git a/tools/perf/tests/shell/base_probe/test_line_semantics.sh b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
index 20435b6bf6bc..576442d87a44 100755
--- a/tools/perf/tests/shell/base_probe/test_line_semantics.sh
+++ b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
@@ -13,11 +13,12 @@
# arguments are properly reported.
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
if ! check_kprobes_available; then
print_overall_skipped
exit 2
diff --git a/tools/perf/tests/shell/base_report/setup.sh b/tools/perf/tests/shell/base_report/setup.sh
index 8634e7e0dda6..bb49b0fabb11 100755
--- a/tools/perf/tests/shell/base_report/setup.sh
+++ b/tools/perf/tests/shell/base_report/setup.sh
@@ -12,8 +12,10 @@
#
#
+DIR_PATH="$(dirname $0)"
+
# include working environment
-. ../common/init.sh
+. "$DIR_PATH/../common/init.sh"
TEST_RESULT=0
@@ -24,7 +26,8 @@ SW_EVENT="cpu-clock"
$CMD_PERF record -asdg -e $SW_EVENT -o $CURRENT_TEST_DIR/perf.data -- $CMD_LONGER_SLEEP 2> $LOGS_DIR/setup.log
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/setup.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/setup.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "prepare the perf.data file"
@@ -38,7 +41,8 @@ echo ==================
cat $LOGS_DIR/setup-latency.log
echo ==================
-../common/check_all_patterns_found.pl "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/setup-latency.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/setup-latency.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "prepare the perf.data.1 file"
diff --git a/tools/perf/tests/shell/base_report/test_basic.sh b/tools/perf/tests/shell/base_report/test_basic.sh
index adfd8713b8f8..0dfe7e5fd1ca 100755
--- a/tools/perf/tests/shell/base_report/test_basic.sh
+++ b/tools/perf/tests/shell/base_report/test_basic.sh
@@ -12,11 +12,12 @@
#
#
-# include working environment
-. ../common/init.sh
-
+DIR_PATH="$(dirname $0)"
TEST_RESULT=0
+# include working environment
+. "$DIR_PATH/../common/init.sh"
+
### help message
@@ -25,19 +26,37 @@ if [ "$PARAM_GENERAL_HELP_TEXT_CHECK" = "y" ]; then
$CMD_PERF report --help > $LOGS_DIR/basic_helpmsg.log 2> $LOGS_DIR/basic_helpmsg.err
PERF_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "PERF-REPORT" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" "OVERHEAD\s+CALCULATION" "SEE ALSO" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "PERF-REPORT" "NAME" "SYNOPSIS" "DESCRIPTION" "OPTIONS" \
+ "OVERHEAD\s+CALCULATION" "SEE ALSO" < $LOGS_DIR/basic_helpmsg.log
CHECK_EXIT_CODE=$?
- ../common/check_all_patterns_found.pl "input" "verbose" "show-nr-samples" "show-cpu-utilization" "threads" "comms" "pid" "tid" "dsos" "symbols" "symbol-filter" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "input" "verbose" "show-nr-samples" "show-cpu-utilization" \
+ "threads" "comms" "pid" "tid" "dsos" "symbols" "symbol-filter" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "hide-unresolved" "sort" "fields" "parent" "exclude-other" "column-widths" "field-separator" "dump-raw-trace" "children" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "hide-unresolved" "sort" "fields" "parent" "exclude-other" \
+ "column-widths" "field-separator" "dump-raw-trace" "children" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "call-graph" "max-stack" "inverted" "ignore-callees" "pretty" "stdio" "tui" "gtk" "vmlinux" "kallsyms" "modules" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "call-graph" "max-stack" "inverted" "ignore-callees" "pretty" \
+ "stdio" "tui" "gtk" "vmlinux" "kallsyms" "modules" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "force" "symfs" "cpu" "disassembler-style" "source" "asm-raw" "show-total-period" "show-info" "branch-stack" "group" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "force" "symfs" "cpu" "disassembler-style" "source" "asm-raw" \
+ "show-total-period" "show-info" "branch-stack" "group" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_all_patterns_found.pl "branch-history" "objdump" "demangle" "percent-limit" "percentage" "header" "itrace" "full-source-path" "show-ref-call-graph" < $LOGS_DIR/basic_helpmsg.log
+ "$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "branch-history" "objdump" "demangle" "percent-limit" "percentage" \
+ "header" "itrace" "full-source-path" "show-ref-call-graph" \
+ < $LOGS_DIR/basic_helpmsg.log
(( CHECK_EXIT_CODE += $? ))
- ../common/check_no_patterns_found.pl "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
+ "$DIR_PATH/../common/check_no_patterns_found.pl" \
+ "No manual entry for" < $LOGS_DIR/basic_helpmsg.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "help message"
@@ -57,9 +76,12 @@ REGEX_LOST_SAMPLES_INFO="#\s*Total Lost Samples:\s+$RE_NUMBER"
REGEX_SAMPLES_INFO="#\s*Samples:\s+(?:$RE_NUMBER)\w?\s+of\s+event\s+'$RE_EVENT_ANY'"
REGEX_LINES_HEADER="#\s*Children\s+Self\s+Command\s+Shared Object\s+Symbol"
REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
-../common/check_all_patterns_found.pl "$REGEX_LOST_SAMPLES_INFO" "$REGEX_SAMPLES_INFO" "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_basic.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_LOST_SAMPLES_INFO" "$REGEX_SAMPLES_INFO" \
+ "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_basic.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_basic.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_basic.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "basic execution"
@@ -74,9 +96,11 @@ PERF_EXIT_CODE=$?
REGEX_LINES_HEADER="#\s*Children\s+Self\s+Samples\s+Command\s+Shared Object\s+Symbol"
REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
-../common/check_all_patterns_found.pl "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_nrsamples.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_nrsamples.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_nrsamples.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_nrsamples.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "number of samples"
@@ -98,7 +122,10 @@ REGEX_LINE_CPUS_ONLINE="#\s+nrcpus online\s*:\s*$MY_CPUS_ONLINE"
REGEX_LINE_CPUS_AVAIL="#\s+nrcpus avail\s*:\s*$MY_CPUS_AVAILABLE"
# disable precise check for "nrcpus avail" in BASIC runmode
test $PERFTOOL_TESTSUITE_RUNMODE -lt $RUNMODE_STANDARD && REGEX_LINE_CPUS_AVAIL="#\s+nrcpus avail\s*:\s*$RE_NUMBER"
-../common/check_all_patterns_found.pl "$REGEX_LINE_TIMESTAMP" "$REGEX_LINE_HOSTNAME" "$REGEX_LINE_KERNEL" "$REGEX_LINE_PERF" "$REGEX_LINE_ARCH" "$REGEX_LINE_CPUS_ONLINE" "$REGEX_LINE_CPUS_AVAIL" < $LOGS_DIR/basic_header.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_LINE_TIMESTAMP" "$REGEX_LINE_HOSTNAME" "$REGEX_LINE_KERNEL" \
+ "$REGEX_LINE_PERF" "$REGEX_LINE_ARCH" "$REGEX_LINE_CPUS_ONLINE" \
+ "$REGEX_LINE_CPUS_AVAIL" < $LOGS_DIR/basic_header.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "header"
@@ -129,9 +156,11 @@ PERF_EXIT_CODE=$?
REGEX_LINES_HEADER="#\s*Children\s+Self\s+sys\s+usr\s+Command\s+Shared Object\s+Symbol"
REGEX_LINES="\s*$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER%\s+$RE_NUMBER%\s+\S+\s+\[kernel\.(?:vmlinux)|(?:kallsyms)\]\s+\[[k\.]\]\s+\w+"
-../common/check_all_patterns_found.pl "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_cpuut.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "$REGEX_LINES_HEADER" "$REGEX_LINES" < $LOGS_DIR/basic_cpuut.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_cpuut.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_cpuut.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "show CPU utilization"
@@ -144,9 +173,11 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "show CPU utilization"
$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --pid=1 > $LOGS_DIR/basic_pid.log 2> $LOGS_DIR/basic_pid.err
PERF_EXIT_CODE=$?
-grep -P -v '^#' $LOGS_DIR/basic_pid.log | grep -P '\s+[\d\.]+%' | ../common/check_all_lines_matched.pl "systemd|init"
+grep -P -v '^#' $LOGS_DIR/basic_pid.log | grep -P '\s+[\d\.]+%' | \
+ "$DIR_PATH/../common/check_all_lines_matched.pl" "systemd|init"
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_pid.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_pid.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "pid"
@@ -159,9 +190,11 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "pid"
$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --symbols=dummynonexistingsymbol > $LOGS_DIR/basic_symbols.log 2> $LOGS_DIR/basic_symbols.err
PERF_EXIT_CODE=$?
-../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" < $LOGS_DIR/basic_symbols.log
+"$DIR_PATH/../common/check_all_lines_matched.pl" \
+ "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" < $LOGS_DIR/basic_symbols.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_symbols.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_symbols.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "non-existing symbol"
@@ -174,9 +207,11 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "non-existing symbol"
$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data --symbol-filter=map > $LOGS_DIR/basic_symbolfilter.log 2> $LOGS_DIR/basic_symbolfilter.err
PERF_EXIT_CODE=$?
-grep -P -v '^#' $LOGS_DIR/basic_symbolfilter.log | grep -P '\s+[\d\.]+%' | ../common/check_all_lines_matched.pl "\[[k\.]\]\s+.*map"
+grep -P -v '^#' $LOGS_DIR/basic_symbolfilter.log | grep -P '\s+[\d\.]+%' | \
+ "$DIR_PATH/../common/check_all_lines_matched.pl" "\[[k\.]\]\s+.*map"
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/basic_symbolfilter.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "$DIR_PATH/stderr-whitelist.txt" < $LOGS_DIR/basic_symbolfilter.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "symbol filter"
@@ -189,7 +224,8 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "symbol filter"
$CMD_PERF report -i $CURRENT_TEST_DIR/perf.data.1 --stdio --header-only > $LOGS_DIR/latency_header.log
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl ", context_switch = 1, " < $LOGS_DIR/latency_header.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ ", context_switch = 1, " < $LOGS_DIR/latency_header.log
CHECK_EXIT_CODE=$?
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "latency header"
@@ -200,9 +236,11 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "latency header"
$CMD_PERF report --stdio -i $CURRENT_TEST_DIR/perf.data.1 > $LOGS_DIR/latency_default.log 2> $LOGS_DIR/latency_default.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "# Overhead Latency Command" < $LOGS_DIR/latency_default.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "# Overhead Latency Command" < $LOGS_DIR/latency_default.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/latency_default.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "stderr-whitelist.txt" < $LOGS_DIR/latency_default.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "default report for latency profile"
@@ -213,9 +251,11 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "default report for latency profi
$CMD_PERF report --latency --stdio -i $CURRENT_TEST_DIR/perf.data.1 > $LOGS_DIR/latency_latency.log 2> $LOGS_DIR/latency_latency.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "# Latency Overhead Command" < $LOGS_DIR/latency_latency.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "# Latency Overhead Command" < $LOGS_DIR/latency_latency.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/latency_latency.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "stderr-whitelist.txt" < $LOGS_DIR/latency_latency.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "latency report for latency profile"
@@ -226,9 +266,12 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "latency report for latency profi
$CMD_PERF report --hierarchy --sort latency,parallelism,comm,symbol --parallelism=1,2 --stdio -i $CURRENT_TEST_DIR/perf.data.1 > $LOGS_DIR/parallelism_hierarchy.log 2> $LOGS_DIR/parallelism_hierarchy.err
PERF_EXIT_CODE=$?
-../common/check_all_patterns_found.pl "# Latency Parallelism / Command / Symbol" < $LOGS_DIR/parallelism_hierarchy.log
+"$DIR_PATH/../common/check_all_patterns_found.pl" \
+ "# Latency Parallelism / Command / Symbol" \
+ < $LOGS_DIR/parallelism_hierarchy.log
CHECK_EXIT_CODE=$?
-../common/check_errors_whitelisted.pl "stderr-whitelist.txt" < $LOGS_DIR/parallelism_hierarchy.err
+"$DIR_PATH/../common/check_errors_whitelisted.pl" \
+ "stderr-whitelist.txt" < $LOGS_DIR/parallelism_hierarchy.err
(( CHECK_EXIT_CODE += $? ))
print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "parallelism histogram"
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
index d2eb213da01d..102808cca9db 100755
--- a/tools/perf/tests/shell/buildid.sh
+++ b/tools/perf/tests/shell/buildid.sh
@@ -36,16 +36,69 @@ if [ ${run_pe} -eq 1 ]; then
unset WAYLAND_DISPLAY
fi
-ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX)
-ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX)
+build_id_dir=
+ex_source=$(mktemp /tmp/perf_buildid_test.ex.XXX.c)
+ex_md5=$(mktemp /tmp/perf_buildid_test.ex.MD5.XXX)
+ex_sha1=$(mktemp /tmp/perf_buildid_test.ex.SHA1.XXX)
ex_pe=$(dirname $0)/../pe-file.exe
+data=$(mktemp /tmp/perf_buildid_test.data.XXX)
+log_out=$(mktemp /tmp/perf_buildid_test.log.out.XXX)
+log_err=$(mktemp /tmp/perf_buildid_test.log.err.XXX)
-echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c -
-echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c -
+cleanup() {
+ rm -f ${ex_source} ${ex_md5} ${ex_sha1} ${data} ${log_out} ${log_err}
+ if [ ${run_pe} -eq 1 ]; then
+ rm -r ${wineprefix}
+ fi
+ if [ -d ${build_id_dir} ]; then
+ rm -rf ${build_id_dir}
+ fi
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+# Test program based on the noploop workload.
+cat <<EOF > ${ex_source}
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+static volatile sig_atomic_t done;
+
+static void sighandler(int sig)
+{
+ (void)sig;
+ done = 1;
+}
+
+int main(int argc, const char **argv)
+{
+ int sec = 1;
+
+ if (argc > 1)
+ sec = atoi(argv[1]);
+
+ signal(SIGINT, sighandler);
+ signal(SIGALRM, sighandler);
+ alarm(sec);
+
+ while (!done)
+ continue;
+
+ return 0;
+}
+EOF
+cc -Wl,--build-id=sha1 ${ex_source} -o ${ex_sha1} -x c -
+cc -Wl,--build-id=md5 ${ex_source} -o ${ex_md5} -x c -
echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}"
-check()
+get_build_id()
{
case $1 in
*.exe)
@@ -64,6 +117,15 @@ check()
id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
;;
esac
+ echo ${id}
+}
+
+check()
+{
+ file=$1
+ perf_data=$2
+
+ id=$(get_build_id $file)
echo "build id: ${id}"
id_file=${id#??}
@@ -76,45 +138,53 @@ check()
exit 1
fi
- file=${build_id_dir}/.build-id/$id_dir/`readlink ${link}`/elf
- echo "file: ${file}"
+ cached_file=${build_id_dir}/.build-id/$id_dir/`readlink ${link}`/elf
+ echo "file: ${cached_file}"
# Check for file permission of original file
# in case of pe-file.exe file
echo $1 | grep ".exe"
if [ $? -eq 0 ]; then
- if [ -x $1 ] && [ ! -x $file ]; then
- echo "failed: file ${file} executable does not exist"
+ if [ -x $1 ] && [ ! -x $cached_file ]; then
+ echo "failed: file ${cached_file} executable does not exist"
exit 1
fi
- if [ ! -x $file ] && [ ! -e $file ]; then
- echo "failed: file ${file} does not exist"
+ if [ ! -x $cached_file ] && [ ! -e $cached_file ]; then
+ echo "failed: file ${cached_file} does not exist"
exit 1
fi
- elif [ ! -x $file ]; then
- echo "failed: file ${file} does not exist"
+ elif [ ! -x $cached_file ]; then
+ echo "failed: file ${cached_file} does not exist"
exit 1
fi
- diff ${file} ${1}
+ diff ${cached_file} ${1}
if [ $? -ne 0 ]; then
- echo "failed: ${file} do not match"
+ echo "failed: ${cached_file} do not match"
exit 1
fi
- ${perf} buildid-cache -l | grep ${id}
+ ${perf} buildid-cache -l | grep -q ${id}
if [ $? -ne 0 ]; then
echo "failed: ${id} is not reported by \"perf buildid-cache -l\""
exit 1
fi
+ if [ -n "${perf_data}" ]; then
+ ${perf} buildid-list -i ${perf_data} | grep -q ${id}
+ if [ $? -ne 0 ]; then
+ echo "failed: ${id} is not reported by \"perf buildid-list -i ${perf_data}\""
+ exit 1
+ fi
+ fi
+
echo "OK for ${1}"
}
test_add()
{
- build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ build_id_dir=$(mktemp -d /tmp/perf_buildid_test.debug.XXX)
perf="perf --buildid-dir ${build_id_dir}"
${perf} buildid-cache -v -a ${1}
@@ -128,12 +198,88 @@ test_add()
rm -rf ${build_id_dir}
}
+test_remove()
+{
+ build_id_dir=$(mktemp -d /tmp/perf_buildid_test.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ ${perf} buildid-cache -v -a ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: add ${1} to build id cache"
+ exit 1
+ fi
+
+ id=$(get_build_id ${1})
+ if ! ${perf} buildid-cache -l | grep -q ${id}; then
+ echo "failed: ${id} not in cache"
+ exit 1
+ fi
+
+ ${perf} buildid-cache -v -r ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: remove ${id} from build id cache"
+ exit 1
+ fi
+
+ if ${perf} buildid-cache -l | grep -q ${id}; then
+ echo "failed: ${id} still in cache after remove"
+ exit 1
+ fi
+
+ echo "remove: OK"
+ rm -rf ${build_id_dir}
+}
+
+test_purge()
+{
+ build_id_dir=$(mktemp -d /tmp/perf_buildid_test.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ id1=$(get_build_id ${ex_sha1})
+ ${perf} buildid-cache -v -a ${ex_sha1}
+ if ! ${perf} buildid-cache -l | grep -q ${id1}; then
+ echo "failed: ${id1} not in cache"
+ exit 1
+ fi
+
+ id2=$(get_build_id ${ex_md5})
+ ${perf} buildid-cache -v -a ${ex_md5}
+ if ! ${perf} buildid-cache -l | grep -q ${id2}; then
+ echo "failed: ${id2} not in cache"
+ exit 1
+ fi
+
+ # Purge by path
+ ${perf} buildid-cache -v -p ${ex_sha1}
+ if [ $? -ne 0 ]; then
+ echo "failed: purge build id cache of ${ex_sha1}"
+ exit 1
+ fi
+
+ ${perf} buildid-cache -v -p ${ex_md5}
+ if [ $? -ne 0 ]; then
+ echo "failed: purge build id cache of ${ex_md5}"
+ exit 1
+ fi
+
+ # Verify both are gone
+ if ${perf} buildid-cache -l | grep -q ${id1}; then
+ echo "failed: ${id1} still in cache after purge"
+ exit 1
+ fi
+
+ if ${perf} buildid-cache -l | grep -q ${id2}; then
+ echo "failed: ${id2} still in cache after purge"
+ exit 1
+ fi
+
+ echo "purge: OK"
+ rm -rf ${build_id_dir}
+}
+
test_record()
{
- data=$(mktemp /tmp/perf.data.XXX)
- build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
- log_out=$(mktemp /tmp/perf.log.out.XXX)
- log_err=$(mktemp /tmp/perf.log.err.XXX)
+ build_id_dir=$(mktemp -d /tmp/perf_buildid_test.debug.XXX)
perf="perf --buildid-dir ${build_id_dir}"
echo "running: perf record $*"
@@ -145,7 +291,7 @@ test_record()
fi
args="$*"
- check ${args##* }
+ check ${args##* } ${data}
rm -f ${log_out} ${log_err}
rm -rf ${build_id_dir}
@@ -166,10 +312,15 @@ if [ ${run_pe} -eq 1 ]; then
test_record wine ${ex_pe}
fi
-# cleanup
-rm ${ex_sha1} ${ex_md5}
-if [ ${run_pe} -eq 1 ]; then
- rm -r ${wineprefix}
+# remove binaries manually via perf buildid-cache -r
+test_remove ${ex_sha1}
+test_remove ${ex_md5}
+if [ ${add_pe} -eq 1 ]; then
+ test_remove ${ex_pe}
fi
+# purge binaries manually via perf buildid-cache -p
+test_purge
+
+cleanup
exit 0
diff --git a/tools/perf/tests/shell/c2c.sh b/tools/perf/tests/shell/c2c.sh
new file mode 100755
index 000000000000..2471d44595c3
--- /dev/null
+++ b/tools/perf/tests/shell/c2c.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# perf c2c tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_c2c_test.perf.data.XXXXX)
+
+cleanup() {
+ rm -f "${perfdata}"
+ rm -f "${perfdata}".old
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+check_c2c_support() {
+ # Check if perf c2c record works.
+ if ! perf c2c record -o "${perfdata}" -- true > /dev/null 2>&1 ; then
+ return 1
+ fi
+ return 0
+}
+
+test_c2c_record_report() {
+ echo "c2c record and report test"
+ if ! check_c2c_support ; then
+ echo "c2c record and report test [Skipped: perf c2c record failed (possibly missing hardware support)]"
+ err=2
+ return
+ fi
+
+ # Run a workload that does some memory operations.
+ if ! perf c2c record -o "${perfdata}" -- perf test -w datasym 1 > /dev/null 2>&1 ; then
+ echo "c2c record and report test [Skipped: perf c2c record failed during workload]"
+ return
+ fi
+
+ if ! perf c2c report -i "${perfdata}" --stdio > /dev/null 2>&1 ; then
+ echo "c2c record and report test [Failed: report failed]"
+ err=1
+ return
+ fi
+
+ if ! perf c2c report -i "${perfdata}" -N > /dev/null 2>&1 ; then
+ echo "c2c record and report test [Failed: report -N failed]"
+ err=1
+ return
+ fi
+
+ echo "c2c record and report test [Success]"
+}
+
+test_c2c_record_report
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/common/init.sh b/tools/perf/tests/shell/common/init.sh
index 26c7525651e0..cbfc78bec974 100644
--- a/tools/perf/tests/shell/common/init.sh
+++ b/tools/perf/tests/shell/common/init.sh
@@ -11,8 +11,8 @@
#
-. ../common/settings.sh
-. ../common/patterns.sh
+. "$(dirname $0)/../common/settings.sh"
+. "$(dirname $0)/../common/patterns.sh"
THIS_TEST_NAME=`basename $0 .sh`
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
index 5f886cd09e6b..7e879217be30 100644
--- a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
+++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
@@ -27,6 +27,8 @@ static void *thrfn(void *arg)
}
for (i = 0; i < len; i++)
memcpy(dst, src, a->size * 1024);
+
+ return NULL;
}
static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
index e05a559253ca..86f3f548b006 100644
--- a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
+++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
@@ -34,8 +34,8 @@ static void *thrfn(void *arg)
}
asm volatile(
"loop:\n"
- "add %[i], %[i], #1\n"
- "cmp %[i], %[len]\n"
+ "add %w[i], %w[i], #1\n"
+ "cmp %w[i], %w[len]\n"
"blt loop\n"
: /* out */
: /* in */ [i] "r" (i), [len] "r" (len)
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
index 0fc7bf1a25af..8f4e1c985ca3 100644
--- a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
@@ -20,7 +20,7 @@ static void *thrfn(void *arg)
for (i = 0; i < 10000; i++) {
asm volatile (
// force an unroll of thia add instruction so we can test long runs of code
-#define SNIP1 "add %[in], %[in], #1\n"
+#define SNIP1 "add %w[in], %w[in], #1\n"
// 10
#define SNIP2 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1
// 100
@@ -36,6 +36,8 @@ static void *thrfn(void *arg)
: /* clobber */
);
}
+
+ return NULL;
}
static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
diff --git a/tools/perf/tests/shell/evlist.sh b/tools/perf/tests/shell/evlist.sh
new file mode 100755
index 000000000000..140f099e75c1
--- /dev/null
+++ b/tools/perf/tests/shell/evlist.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# perf evlist tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+
+cleanup() {
+ rm -f "${perfdata}"
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+test_evlist_simple() {
+ echo "Simple evlist test"
+ if ! perf record -e cycles -o "${perfdata}" true 2> /dev/null
+ then
+ echo "Simple evlist [Failed record]"
+ err=1
+ return
+ fi
+ if ! perf evlist -i "${perfdata}" | grep -q "cycles"
+ then
+ echo "Simple evlist [Failed to list event]"
+ err=1
+ return
+ fi
+ echo "Simple evlist test [Success]"
+}
+
+test_evlist_group() {
+ echo "Group evlist test"
+ if ! perf record -e "{cycles,instructions}" -o "${perfdata}" true 2> /dev/null
+ then
+ echo "Group evlist [Skipped event group recording failed]"
+ return
+ fi
+
+ if ! perf evlist -i "${perfdata}" -g | grep -q "{.*cycles.*,.*instructions.*}"
+ then
+ echo "Group evlist [Failed to list event group]"
+ err=1
+ return
+ fi
+ echo "Group evlist test [Success]"
+}
+
+test_evlist_verbose() {
+ echo "Event configuration evlist test"
+ if ! perf record -e cycles -o "${perfdata}" true 2> /dev/null
+ then
+ echo "Event configuration evlist [Failed record]"
+ err=1
+ return
+ fi
+
+ if ! perf evlist -i "${perfdata}" -v | grep -q "config:"
+ then
+ echo "Event configuration evlist [Failed to list verbose info]"
+ err=1
+ return
+ fi
+ echo "Event configuration evlist test [Success]"
+}
+
+test_evlist_simple
+test_evlist_group
+test_evlist_verbose
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/jitdump-python.sh b/tools/perf/tests/shell/jitdump-python.sh
new file mode 100755
index 000000000000..ae86203b14a2
--- /dev/null
+++ b/tools/perf/tests/shell/jitdump-python.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# python profiling with jitdump
+# SPDX-License-Identifier: GPL-2.0
+
+SHELLDIR=$(dirname $0)
+# shellcheck source=lib/setup_python.sh
+. "${SHELLDIR}"/lib/setup_python.sh
+
+OUTPUT=$(${PYTHON} -Xperf_jit -c 'import os, sys; print(os.getpid(), sys.is_stack_trampoline_active())' 2> /dev/null)
+PID=${OUTPUT% *}
+HAS_PERF_JIT=${OUTPUT#* }
+
+rm -f /tmp/jit-${PID}.dump 2> /dev/null
+if [ "${HAS_PERF_JIT}" != "True" ]; then
+ echo "SKIP: python JIT dump is not available"
+ exit 2
+fi
+
+PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXXX)
+
+cleanup() {
+ echo "Cleaning up files..."
+ rm -f ${PERF_DATA} ${PERF_DATA}.jit /tmp/jit-${PID}.dump /tmp/jitted-${PID}-*.so 2> /dev/null
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected termination"
+ cleanup
+ exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+echo "Run python with -Xperf_jit"
+cat <<EOF | perf record -k 1 -g --call-graph dwarf -o "${PERF_DATA}" \
+ -- ${PYTHON} -Xperf_jit
+def foo(n):
+ result = 0
+ for _ in range(n):
+ result += 1
+ return result
+
+def bar(n):
+ foo(n)
+
+def baz(n):
+ bar(n)
+
+if __name__ == "__main__":
+ baz(1000000)
+EOF
+
+# extract PID of the target process from the data
+_PID=$(perf report -i "${PERF_DATA}" -F pid -q -g none | cut -d: -f1 -s)
+PID=$(echo -n $_PID) # remove newlines
+
+echo "Generate JIT-ed DSOs using perf inject"
+DEBUGINFOD_URLS='' perf inject -i "${PERF_DATA}" -j -o "${PERF_DATA}.jit"
+
+echo "Add JIT-ed DSOs to the build-ID cache"
+for F in /tmp/jitted-${PID}-*.so; do
+ perf buildid-cache -a "${F}"
+done
+
+echo "Check the symbol containing the function/module name"
+NUM=$(perf report -i "${PERF_DATA}.jit" -s sym | grep -cE 'py::(foo|bar|baz):<stdin>')
+
+echo "Found ${NUM} matching lines"
+
+echo "Remove JIT-ed DSOs from the build-ID cache"
+for F in /tmp/jitted-${PID}-*.so; do
+ perf buildid-cache -r "${F}"
+done
+
+cleanup
+
+if [ "${NUM}" -eq 0 ]; then
+ exit 1
+fi
diff --git a/tools/perf/tests/shell/kallsyms.sh b/tools/perf/tests/shell/kallsyms.sh
new file mode 100755
index 000000000000..d0eb99753f47
--- /dev/null
+++ b/tools/perf/tests/shell/kallsyms.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# perf kallsyms tests
+# SPDX-License-Identifier: GPL-2.0
+
+err=0
+
+test_kallsyms() {
+ echo "Basic perf kallsyms test"
+
+ # Check if /proc/kallsyms is readable
+ if [ ! -r /proc/kallsyms ]; then
+ echo "Basic perf kallsyms test [Skipped: /proc/kallsyms not readable]"
+ err=2
+ return
+ fi
+
+ # Use a symbol that is definitely a function and present in all kernels, e.g. schedule
+ symbol="schedule"
+
+ # Run perf kallsyms
+ # It prints "address symbol_name"
+ output=$(perf kallsyms $symbol 2>&1)
+ ret=$?
+
+ if [ $ret -ne 0 ] || [ -z "$output" ]; then
+ # If empty or failed, it might be due to permissions (kptr_restrict)
+ # Check if we can grep the symbol from /proc/kallsyms directly
+ if grep -q "$symbol" /proc/kallsyms 2>/dev/null; then
+ # If it's in /proc/kallsyms but perf kallsyms returned empty/error,
+ # it likely means perf couldn't parse it or access it correctly (e.g. kptr_restrict=2).
+ echo "Basic perf kallsyms test [Skipped: $symbol found in /proc/kallsyms but perf kallsyms failed (output: '$output')]"
+ err=2
+ return
+ else
+ echo "Basic perf kallsyms test [Skipped: $symbol not found in /proc/kallsyms]"
+ err=2
+ return
+ fi
+ fi
+
+ if echo "$output" | grep -q "not found"; then
+ echo "Basic perf kallsyms test [Failed: output '$output' does not contain $symbol]"
+ err=1
+ return
+ fi
+
+ if perf kallsyms ErlingHaaland | grep -vq "not found"; then
+ echo "Basic perf kallsyms test [Failed: ErlingHaaland found in the output]"
+ err=1
+ return
+ fi
+ echo "Basic perf kallsyms test [Success]"
+}
+
+test_kallsyms
+exit $err
diff --git a/tools/perf/tests/shell/kvm.sh b/tools/perf/tests/shell/kvm.sh
new file mode 100755
index 000000000000..2fafde1a29cc
--- /dev/null
+++ b/tools/perf/tests/shell/kvm.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# perf kvm tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_kvm_test.perf.data.XXXXX)
+qemu_pid_file=$(mktemp /tmp/__perf_kvm_test.qemu.pid.XXXXX)
+
+cleanup() {
+ rm -f "${perfdata}"
+ if [ -f "${qemu_pid_file}" ]; then
+ if [ -s "${qemu_pid_file}" ]; then
+ qemu_pid=$(cat "${qemu_pid_file}")
+ if [ -n "${qemu_pid}" ]; then
+ kill "${qemu_pid}" 2>/dev/null || true
+ fi
+ fi
+ rm -f "${qemu_pid_file}"
+ fi
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+skip() {
+ echo "Skip: $1"
+ cleanup
+ exit 2
+}
+
+test_kvm_stat() {
+ echo "Testing perf kvm stat"
+
+ echo "Recording kvm events for pid ${qemu_pid}..."
+ if ! perf kvm stat record -p "${qemu_pid}" -o "${perfdata}" sleep 1; then
+ echo "Failed to record kvm events"
+ err=1
+ return
+ fi
+
+ echo "Reporting kvm events..."
+ if ! perf kvm -i "${perfdata}" stat report 2>&1 | grep -q "VM-EXIT"; then
+ echo "Failed to find VM-EXIT in report"
+ perf kvm -i "${perfdata}" stat report 2>&1
+ err=1
+ return
+ fi
+
+ echo "perf kvm stat test [Success]"
+}
+
+test_kvm_record_report() {
+ echo "Testing perf kvm record/report"
+
+ echo "Recording kvm profile for pid ${qemu_pid}..."
+ # Use --host to avoid needing guest symbols/mounts for this simple test
+ # We just want to verify the command runs and produces data
+ # We run in background and kill it because 'perf kvm record' appends options
+ # after the command, which breaks 'sleep' (e.g. it gets '-e cycles').
+ perf kvm --host record -p "${qemu_pid}" -o "${perfdata}" &
+ rec_pid=$!
+ sleep 1
+ kill -INT "${rec_pid}"
+ wait "${rec_pid}" || true
+
+ echo "Reporting kvm profile..."
+ # Check for some standard output from report
+ if ! perf kvm -i "${perfdata}" report --stdio 2>&1 | grep -q "Event count"; then
+ echo "Failed to report kvm profile"
+ perf kvm -i "${perfdata}" report --stdio 2>&1
+ err=1
+ return
+ fi
+
+ echo "perf kvm record/report test [Success]"
+}
+
+test_kvm_buildid_list() {
+ echo "Testing perf kvm buildid-list"
+
+ # We reuse the perf.data from the previous record test
+ if ! perf kvm --host -i "${perfdata}" buildid-list 2>&1 | grep -q "."; then
+ echo "Failed to list buildids"
+ perf kvm --host -i "${perfdata}" buildid-list 2>&1
+ err=1
+ return
+ fi
+
+ echo "perf kvm buildid-list test [Success]"
+}
+
+setup_qemu() {
+ # Find qemu
+ if [ "$(uname -m)" = "x86_64" ]; then
+ qemu="qemu-system-x86_64"
+ elif [ "$(uname -m)" = "aarch64" ]; then
+ qemu="qemu-system-aarch64"
+ elif [ "$(uname -m)" = "s390x" ]; then
+ qemu="qemu-system-s390x"
+ elif [ "$(uname -m)" = "ppc64le" ]; then
+ qemu="qemu-system-ppc64"
+ else
+ qemu="qemu-system-$(uname -m)"
+ fi
+
+ if ! which -s "$qemu"; then
+ skip "$qemu not found"
+ fi
+
+ if [ ! -r /dev/kvm ] || [ ! -w /dev/kvm ]; then
+ skip "/dev/kvm not accessible"
+ fi
+
+ if ! perf kvm stat record -a sleep 0.01 >/dev/null 2>&1; then
+ skip "No permission to record kvm events"
+ fi
+
+ echo "Starting $qemu..."
+ # Start qemu in background, detached, with pidfile
+ # We use -display none -daemonize and a monitor to keep it alive/controllable if needed
+ # We don't need a real kernel, just KVM active.
+ if ! $qemu -enable-kvm -display none -daemonize -pidfile "${qemu_pid_file}" -monitor none; then
+ echo "Failed to start qemu"
+ err=1
+ return
+ fi
+
+ # Wait a bit for qemu to start
+ sleep 1
+ qemu_pid=$(cat "${qemu_pid_file}")
+
+ if ! kill -0 "${qemu_pid}" 2>/dev/null; then
+ echo "Qemu process failed to stay alive"
+ err=1
+ return
+ fi
+}
+
+setup_qemu
+if [ $err -eq 0 ]; then
+ test_kvm_stat
+ test_kvm_record_report
+ test_kvm_buildid_list
+fi
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
index c6750ef06c0f..dafbde56cc76 100644
--- a/tools/perf/tests/shell/lib/perf_json_output_lint.py
+++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
@@ -43,6 +43,9 @@ def isint(num):
def is_counter_value(num):
return isfloat(num) or num == '<not counted>' or num == '<not supported>'
+def is_metric_value(num):
+ return isfloat(num) or num == 'none'
+
def check_json_output(expected_items):
checks = {
'counters': lambda x: isfloat(x),
@@ -57,7 +60,7 @@ def check_json_output(expected_items):
'event-runtime': lambda x: isfloat(x),
'interval': lambda x: isfloat(x),
'metric-unit': lambda x: True,
- 'metric-value': lambda x: isfloat(x),
+ 'metric-value': lambda x: is_metric_value(x),
'metric-threshold': lambda x: x in ['unknown', 'good', 'less good', 'nearly bad', 'bad'],
'metricgroup': lambda x: True,
'node': lambda x: True,
@@ -65,8 +68,6 @@ def check_json_output(expected_items):
'socket': lambda x: True,
'thread': lambda x: True,
'unit': lambda x: True,
- 'insn per cycle': lambda x: isfloat(x),
- 'GHz': lambda x: True, # FIXME: it seems unintended for --metric-only
}
input = '[\n' + ','.join(Lines) + '\n]'
for item in json.loads(input):
@@ -88,6 +89,8 @@ def check_json_output(expected_items):
f' in \'{item}\'')
for key, value in item.items():
if key not in checks:
+ if args.metric_only:
+ continue
raise RuntimeError(f'Unexpected key: key={key} value={value}')
if not checks[key](value):
raise RuntimeError(f'Check failed for: key={key} value={value}')
diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh
index c2ec7881ec1d..3c36e80fe422 100644
--- a/tools/perf/tests/shell/lib/stat_output.sh
+++ b/tools/perf/tests/shell/lib/stat_output.sh
@@ -156,7 +156,7 @@ check_metric_only()
echo "[Skip] CPU-measurement counter facility not installed"
return
fi
- perf stat --metric-only $2 -e instructions,cycles true
+ perf stat --metric-only $2 -M page_faults_per_second true
commachecker --metric-only
echo "[Success]"
}
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index d33d9e4392b0..6dd90519f45c 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -7,18 +7,24 @@ set -e
err=0
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
result=$(mktemp /tmp/__perf_test.result.XXXXX)
+errout=$(mktemp /tmp/__perf_test.errout.XXXXX)
cleanup() {
rm -f ${perfdata}
rm -f ${result}
- trap - EXIT TERM INT
+ rm -f ${errout}
+ trap - EXIT TERM INT ERR
}
trap_cleanup() {
+ if (( $? == 139 )); then #SIGSEGV
+ err=1
+ fi
+ echo "Unexpected signal in ${FUNCNAME[1]}"
cleanup
exit ${err}
}
-trap trap_cleanup EXIT TERM INT
+trap trap_cleanup EXIT TERM INT ERR
check() {
if [ "$(id -u)" != 0 ]; then
@@ -75,10 +81,12 @@ test_bpf()
test_record_concurrent()
{
echo "Testing perf lock record and perf lock contention at the same time"
- perf lock record -o- -- perf bench sched messaging -p 2> /dev/null | \
+ perf lock record -o- -- perf bench sched messaging -p 2> ${errout} | \
perf lock contention -i- -E 1 -q 2> ${result}
if [ "$(cat "${result}" | wc -l)" != "1" ]; then
echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)"
+ cat ${errout}
+ cat ${result}
err=1
exit
fi
@@ -140,7 +148,7 @@ test_aggr_cgroup()
fi
# the perf lock contention output goes to the stderr
- perf lock con -a -b -g -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+ perf lock con -a -b --lock-cgroup -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result}
if [ "$(cat "${result}" | wc -l)" != "1" ]; then
echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
@@ -266,7 +274,7 @@ test_cgroup_filter()
return
fi
- perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+ perf lock con -a -b --lock-cgroup -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result}
if [ "$(cat "${result}" | wc -l)" != "1" ]; then
echo "[Fail] BPF result should have a cgroup result:" "$(cat "${result}")"
err=1
@@ -274,7 +282,7 @@ test_cgroup_filter()
fi
cgroup=$(cat "${result}" | awk '{ print $3 }')
- perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+ perf lock con -a -b --lock-cgroup -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result}
if [ "$(cat "${result}" | wc -l)" != "1" ]; then
echo "[Fail] BPF result should have a result with cgroup filter:" "$(cat "${cgroup}")"
err=1
@@ -333,4 +341,5 @@ test_aggr_task_stack_filter
test_cgroup_filter
test_csv_output
+cleanup
exit ${err}
diff --git a/tools/perf/tests/shell/python-use.sh b/tools/perf/tests/shell/python-use.sh
new file mode 100755
index 000000000000..fd2ee5390060
--- /dev/null
+++ b/tools/perf/tests/shell/python-use.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# 'import perf' in python
+# SPDX-License-Identifier: GPL-2.0
+# Just test if we can load the python binding.
+set -e
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
+
+MODULE_DIR=$(dirname "$(which perf)")/python
+
+if [ -d "$MODULE_DIR" ]
+then
+ CMD=$(cat <<EOF
+import sys
+sys.path.insert(0, '$MODULE_DIR')
+import perf
+print('success!')
+EOF
+ )
+else
+ CMD=$(cat <<EOF
+import perf
+print('success!')
+EOF
+ )
+fi
+
+echo -e "Testing 'import perf' with:\n$CMD"
+
+if ! echo "$CMD" | $PYTHON | grep -q "success!"
+then
+ exit 1
+fi
+exit 0
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index b1ad24fb3b33..0f5841c479e7 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -388,6 +388,45 @@ test_callgraph() {
echo "Callgraph test [Success]"
}
+test_ratio_to_prev() {
+ echo "ratio-to-prev test"
+ if ! perf record -o /dev/null -e "{instructions, cycles/period=100000,ratio-to-prev=0.5/}" \
+ true 2> /dev/null
+ then
+ echo "ratio-to-prev [Skipped not supported]"
+ return
+ fi
+ if ! perf record -o /dev/null -e "instructions, cycles/period=100000,ratio-to-prev=0.5/" \
+ true |& grep -q 'Invalid use of ratio-to-prev term without preceding element in group'
+ then
+ echo "ratio-to-prev test [Failed elements must be in same group]"
+ err=1
+ return
+ fi
+ if ! perf record -o /dev/null -e "{instructions,dummy,cycles/period=100000,ratio-to-prev=0.5/}" \
+ true |& grep -q 'must have same PMU'
+ then
+ echo "ratio-to-prev test [Failed elements must have same PMU]"
+ err=1
+ return
+ fi
+ if ! perf record -o /dev/null -e "{instructions,cycles/ratio-to-prev=0.5/}" \
+ true |& grep -q 'Event period term or count (-c) must be set when using ratio-to-prev term.'
+ then
+ echo "ratio-to-prev test [Failed period must be set]"
+ err=1
+ return
+ fi
+ if ! perf record -o /dev/null -e "{cycles/ratio-to-prev=0.5/}" \
+ true |& grep -q 'Invalid use of ratio-to-prev term without preceding element in group'
+ then
+ echo "ratio-to-prev test [Failed need 2+ events]"
+ err=1
+ return
+ fi
+ echo "Basic ratio-to-prev record test [Success]"
+}
+
# raise the limit of file descriptors to minimum
if [[ $default_fd_limit -lt $min_fd_limit ]]; then
ulimit -Sn $min_fd_limit
@@ -404,6 +443,7 @@ test_leader_sampling
test_topdown_leader_sampling
test_precise_max
test_callgraph
+test_ratio_to_prev
# restore the default value
ulimit -Sn $default_fd_limit
diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh
index 6fcb5e52b9b4..78a02e90ece1 100755
--- a/tools/perf/tests/shell/record_lbr.sh
+++ b/tools/perf/tests/shell/record_lbr.sh
@@ -4,6 +4,10 @@
set -e
+ParanoidAndNotRoot() {
+ [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
+}
+
if [ ! -f /sys/bus/event_source/devices/cpu/caps/branches ] &&
[ ! -f /sys/bus/event_source/devices/cpu_core/caps/branches ]
then
@@ -23,6 +27,7 @@ cleanup() {
}
trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
cleanup
exit 1
}
@@ -123,8 +128,11 @@ lbr_test "-j ind_call" "any indirect call" 2
lbr_test "-j ind_jmp" "any indirect jump" 100
lbr_test "-j call" "direct calls" 2
lbr_test "-j ind_call,u" "any indirect user call" 100
-lbr_test "-a -b" "system wide any branch" 2
-lbr_test "-a -j any_call" "system wide any call" 2
+if ! ParanoidAndNotRoot 1
+then
+ lbr_test "-a -b" "system wide any branch" 2
+ lbr_test "-a -j any_call" "system wide any call" 2
+fi
# Parallel
parallel_lbr_test "-b" "parallel any branch" 100 &
@@ -141,10 +149,16 @@ parallel_lbr_test "-j call" "parallel direct calls" 100 &
pid6=$!
parallel_lbr_test "-j ind_call,u" "parallel any indirect user call" 100 &
pid7=$!
-parallel_lbr_test "-a -b" "parallel system wide any branch" 100 &
-pid8=$!
-parallel_lbr_test "-a -j any_call" "parallel system wide any call" 100 &
-pid9=$!
+if ParanoidAndNotRoot 1
+then
+ pid8=
+ pid9=
+else
+ parallel_lbr_test "-a -b" "parallel system wide any branch" 100 &
+ pid8=$!
+ parallel_lbr_test "-a -j any_call" "parallel system wide any call" 100 &
+ pid9=$!
+fi
for pid in $pid1 $pid2 $pid3 $pid4 $pid5 $pid6 $pid7 $pid8 $pid9
do
diff --git a/tools/perf/tests/shell/record_weak_term.sh b/tools/perf/tests/shell/record_weak_term.sh
new file mode 100755
index 000000000000..811b00ffb47a
--- /dev/null
+++ b/tools/perf/tests/shell/record_weak_term.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# record weak terms
+# SPDX-License-Identifier: GPL-2.0
+# Test that command line options override weak terms from sysfs or inbuilt json.
+set -e
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
+
+# Find the first event with a specified period, such as
+# "cpu_core/event=0x24,period=200003,umask=0xff/"
+event=$(perf list --json | $PYTHON -c '
+import json, sys
+for e in json.load(sys.stdin):
+ if "EventName" not in e or "/modifier" in e["EventName"]:
+ continue
+ if "Encoding" in e and "period=" in e["Encoding"]:
+ print(e["EventName"])
+ break
+')
+if [[ "$event" = "" ]]
+then
+ echo "Skip: No sysfs/json events with inbuilt period."
+ exit 2
+fi
+
+echo "Testing that for $event the period is overridden with 1000"
+perf list --detail "$event"
+if ! perf record -c 1000 -vv -e "$event" -o /dev/null true 2>&1 | \
+ grep -q -F '{ sample_period, sample_freq } 1000'
+then
+ echo "Fail: Unexpected verbose output and sample period"
+ exit 1
+fi
+echo "Success"
+exit 0
diff --git a/tools/perf/tests/shell/script_dlfilter.sh b/tools/perf/tests/shell/script_dlfilter.sh
new file mode 100755
index 000000000000..45c97d4a7d5f
--- /dev/null
+++ b/tools/perf/tests/shell/script_dlfilter.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# perf script --dlfilter tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
+
+# skip if there's no compiler
+if ! [ -x "$(command -v cc)" ]; then
+ echo "failed: no compiler, install gcc"
+ exit 2
+fi
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+dlfilter_c=$(mktemp /tmp/__perf_test.dlfilter.test.c.XXXXX)
+dlfilter_so=$(mktemp /tmp/__perf_test.dlfilter.so.XXXXX)
+
+cleanup() {
+ rm -f "${perfdata}"
+ rm -f "${dlfilter_c}"
+ rm -f "${dlfilter_so}"
+ rm -f "${dlfilter_so}.o"
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+cat <<EOF > "${dlfilter_c}"
+#include <perf/perf_dlfilter.h>
+#include <string.h>
+#include <stdio.h>
+
+struct perf_dlfilter_fns perf_dlfilter_fns;
+
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+ const struct perf_dlfilter_al *al;
+
+ if (!sample->ip)
+ return 0;
+
+ al = perf_dlfilter_fns.resolve_ip(ctx);
+ if (!al || !al->sym || strcmp(al->sym, "test_loop"))
+ return 1;
+
+ return 0;
+}
+EOF
+
+test_dlfilter() {
+ echo "Basic --dlfilter test"
+ # Generate perf.data file
+ if ! perf record -o "${perfdata}" perf test -w thloop 1 2> /dev/null
+ then
+ echo "Basic --dlfilter test [Failed record]"
+ err=1
+ return
+ fi
+
+ # Build the dlfilter
+ if ! cc -c -I tools/perf/include -fpic -x c "${dlfilter_c}" -o "${dlfilter_so}.o"
+ then
+ echo "Basic --dlfilter test [Failed to build dlfilter object]"
+ err=1
+ return
+ fi
+
+ if ! cc -shared -o "${dlfilter_so}" "${dlfilter_so}.o"
+ then
+ echo "Basic --dlfilter test [Failed to link dlfilter shared object]"
+ err=1
+ return
+ fi
+
+ # Check that the output contains "test_loop" and nothing else
+ if ! perf script -i "${perfdata}" --dlfilter "${dlfilter_so}" | grep -q "test_loop"
+ then
+ echo "Basic --dlfilter test [Failed missing output]"
+ err=1
+ return
+ fi
+
+ # The filter should filter out everything except test_loop, so ensure no other symbols are present
+ # This is a simple check; we could be more rigorous
+ if perf script -i "${perfdata}" --dlfilter "${dlfilter_so}" | grep -v "test_loop" | grep -q "perf"
+ then
+ echo "Basic --dlfilter test [Failed filtering]"
+ err=1
+ return
+ fi
+
+ echo "Basic --dlfilter test [Success]"
+}
+
+test_dlfilter
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index 7a6f6e177402..cd6fff597091 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -44,7 +44,7 @@ function commachecker()
;; "--per-die") exp=8
;; "--per-cluster") exp=8
;; "--per-cache") exp=8
- ;; "--metric-only") exp=2
+ ;; "--metric-only") exp=1
esac
while read line
diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh
index bf54bd6c3e2e..b5dec6b6da36 100755
--- a/tools/perf/tests/shell/stat+event_uniquifying.sh
+++ b/tools/perf/tests/shell/stat+event_uniquifying.sh
@@ -4,74 +4,63 @@
set -e
-stat_output=$(mktemp /tmp/__perf_test.stat_output.XXXXX)
-perf_tool=perf
err=0
+stat_output=$(mktemp /tmp/__perf_test.stat_output.XXXXX)
-test_event_uniquifying() {
- # We use `clockticks` in `uncore_imc` to verify the uniquify behavior.
- pmu="uncore_imc"
- event="clockticks"
-
- # If the `-A` option is added, the event should be uniquified.
- #
- # $perf list -v clockticks
- #
- # List of pre-defined events (to be used in -e or -M):
- #
- # uncore_imc_0/clockticks/ [Kernel PMU event]
- # uncore_imc_1/clockticks/ [Kernel PMU event]
- # uncore_imc_2/clockticks/ [Kernel PMU event]
- # uncore_imc_3/clockticks/ [Kernel PMU event]
- # uncore_imc_4/clockticks/ [Kernel PMU event]
- # uncore_imc_5/clockticks/ [Kernel PMU event]
- #
- # ...
- #
- # $perf stat -e clockticks -A -- true
- #
- # Performance counter stats for 'system wide':
- #
- # CPU0 3,773,018 uncore_imc_0/clockticks/
- # CPU0 3,609,025 uncore_imc_1/clockticks/
- # CPU0 0 uncore_imc_2/clockticks/
- # CPU0 3,230,009 uncore_imc_3/clockticks/
- # CPU0 3,049,897 uncore_imc_4/clockticks/
- # CPU0 0 uncore_imc_5/clockticks/
- #
- # 0.002029828 seconds time elapsed
-
- echo "stat event uniquifying test"
- uniquified_event_array=()
+cleanup() {
+ rm -f "${stat_output}"
- # Skip if the machine does not have `uncore_imc` device.
- if ! ${perf_tool} list pmu | grep -q ${pmu}; then
- echo "Target does not support PMU ${pmu} [Skipped]"
- err=2
- return
- fi
+ trap - EXIT TERM INT
+}
- # Check how many uniquified events.
- while IFS= read -r line; do
- uniquified_event=$(echo "$line" | awk '{print $1}')
- uniquified_event_array+=("${uniquified_event}")
- done < <(${perf_tool} list -v ${event} | grep ${pmu})
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
- perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true"
- $perf_command
+test_event_uniquifying() {
+ echo "Uniquification of PMU sysfs events test"
- # Check the output contains all uniquified events.
- for uniquified_event in "${uniquified_event_array[@]}"; do
- if ! cat "${stat_output}" | grep -q "${uniquified_event}"; then
- echo "Event is not uniquified [Failed]"
- echo "${perf_command}"
- cat "${stat_output}"
- err=1
- break
- fi
+ # Read events from perf list with and without -v. With -v the duplicate PMUs
+ # aren't deduplicated. Note, json events are listed by perf list without a
+ # PMU.
+ read -ra pmu_events <<< "$(perf list --raw pmu)"
+ read -ra pmu_v_events <<< "$(perf list -v --raw pmu)"
+ # For all non-deduplicated events.
+ for pmu_v_event in "${pmu_v_events[@]}"; do
+ # If the event matches an event in the deduplicated events then it musn't
+ # be an event with duplicate PMUs, continue the outer loop.
+ for pmu_event in "${pmu_events[@]}"; do
+ if [[ "$pmu_v_event" == "$pmu_event" ]]; then
+ continue 2
+ fi
+ done
+ # Strip the suffix from the non-deduplicated event's PMU.
+ event=$(echo "$pmu_v_event" | sed -E 's/_[0-9]+//')
+ for pmu_event in "${pmu_events[@]}"; do
+ if [[ "$event" == "$pmu_event" ]]; then
+ echo "Testing event ${event} is uniquified to ${pmu_v_event}"
+ if ! perf stat -e "$event" -A -o ${stat_output} -- true; then
+ echo "Error running perf stat for event '$event' [Skip]"
+ if [ $err = 0 ]; then
+ err=2
+ fi
+ continue
+ fi
+ # Ensure the non-deduplicated event appears in the output.
+ if ! grep -q "${pmu_v_event}" "${stat_output}"; then
+ echo "Uniquification of PMU sysfs events test [Failed]"
+ cat "${stat_output}"
+ err=1
+ fi
+ break
+ fi
+ done
done
}
test_event_uniquifying
-rm -f "${stat_output}"
+cleanup
exit $err
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 98fb65274ac4..85d1ad7186c6 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -181,7 +181,7 @@ check_metric_only()
echo "[Skip] CPU-measurement counter facility not installed"
return
fi
- perf stat -j --metric-only -e instructions,cycles -o "${stat_output}" true
+ perf stat -j --metric-only -M page_faults_per_second -o "${stat_output}" true
$PYTHON $pythonchecker --metric-only --file "${stat_output}"
echo "[Success]"
}
diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh
index 8824f445d343..cabbbf17c662 100755
--- a/tools/perf/tests/shell/stat+shadow_stat.sh
+++ b/tools/perf/tests/shell/stat+shadow_stat.sh
@@ -14,7 +14,7 @@ perf stat -a -e cycles sleep 1 2>&1 | grep -e cpu_core && exit 2
test_global_aggr()
{
- perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \
+ perf stat -a --no-big-num -M insn_per_cycle sleep 1 2>&1 | \
grep -e cycles -e instructions | \
while read num evt _ ipc rest
do
@@ -53,7 +53,7 @@ test_global_aggr()
test_no_aggr()
{
- perf stat -a -A --no-big-num -e cycles,instructions sleep 1 2>&1 | \
+ perf stat -a -A --no-big-num -M insn_per_cycle sleep 1 2>&1 | \
grep ^CPU | \
while read cpu num evt _ ipc rest
do
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index 6fee67693ba7..9c4b92ecf448 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -12,8 +12,8 @@ set -e
stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses)
-event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches")
-skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound" "TopdownL1" "percent of slots")
+event_metric=("CPUs_utilized" "CPUs_utilized" "cs/sec" "migrations/sec" "faults/sec" "frontend_cycles_idle" "backend_cycles_idle" "GHz" "insn_per_cycle" "/sec" "branch_miss_rate")
+skip_metric=("tma_" "TopdownL1")
cleanup() {
rm -f "${stat_output}"
@@ -90,7 +90,11 @@ function commachecker()
}
done < "${stat_output}"
- [ $metric_only -eq 1 ] && exit 1
+ if [ $metric_only -ne 1 ]
+ then
+ echo "Missing metric only output in:"
+ cat "${stat_output}"
+ fi
return 0
}
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 8a100a7f2dc1..0b2f0f88ca16 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -16,9 +16,46 @@ test_default_stat() {
echo "Basic stat command test [Success]"
}
+test_null_stat() {
+ echo "Null stat command test"
+ if ! perf stat --null true 2>&1 | grep -E -q "Performance counter stats for 'true':"
+ then
+ echo "Null stat command test [Failed]"
+ err=1
+ return
+ fi
+ echo "Null stat command test [Success]"
+}
+
+find_offline_cpu() {
+ for i in $(seq 1 4096)
+ do
+ if [[ ! -f /sys/devices/system/cpu/cpu$i/online || \
+ $(cat /sys/devices/system/cpu/cpu$i/online) == "0" ]]
+ then
+ echo $i
+ return
+ fi
+ done
+ echo "Failed to find offline CPU"
+ exit 1
+}
+
+test_offline_cpu_stat() {
+ cpu=$(find_offline_cpu)
+ echo "Offline CPU stat command test (cpu $cpu)"
+ if ! perf stat "-C$cpu" -e cycles true 2>&1 | grep -E -q "No supported events found."
+ then
+ echo "Offline CPU stat command test [Failed]"
+ err=1
+ return
+ fi
+ echo "Offline CPU stat command test [Success]"
+}
+
test_stat_record_report() {
echo "stat record and report test"
- if ! perf stat record -o - true | perf stat report -i - 2>&1 | \
+ if ! perf stat record -e task-clock -o - true | perf stat report -i - 2>&1 | \
grep -E -q "Performance counter stats for 'pipe':"
then
echo "stat record and report test [Failed]"
@@ -30,7 +67,7 @@ test_stat_record_report() {
test_stat_record_script() {
echo "stat record and script test"
- if ! perf stat record -o - true | perf script -i - 2>&1 | \
+ if ! perf stat record -e task-clock -o - true | perf script -i - 2>&1 | \
grep -E -q "CPU[[:space:]]+THREAD[[:space:]]+VAL[[:space:]]+ENA[[:space:]]+RUN[[:space:]]+TIME[[:space:]]+EVENT"
then
echo "stat record and script test [Failed]"
@@ -196,7 +233,7 @@ test_hybrid() {
fi
# Run default Perf stat
- cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*| cycles[:uH]* " -c)
+ cycles_events=$(perf stat -a -- sleep 0.1 2>&1 | grep -E "/cpu-cycles/[uH]*| cpu-cycles[:uH]* " -c)
# The expectation is that default output will have a cycles events on each
# hybrid PMU. In situations with no cycles PMU events, like virtualized, this
@@ -212,6 +249,8 @@ test_hybrid() {
}
test_default_stat
+test_null_stat
+test_offline_cpu_stat
test_stat_record_report
test_stat_record_script
test_stat_repeat_weak_groups
diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh
index c6d61a4ac3e7..1400880ec01f 100755
--- a/tools/perf/tests/shell/stat_all_metricgroups.sh
+++ b/tools/perf/tests/shell/stat_all_metricgroups.sh
@@ -37,6 +37,9 @@ do
then
err=2 # Skip
fi
+ elif [[ "$m" == @(Default2|Default3|Default4) ]]
+ then
+ echo "Ignoring failures in $m that may contain unsupported legacy events"
else
echo "Metric group $m failed"
echo $result
diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh
index 6fa585a1e34c..3dabb39c7cc8 100755
--- a/tools/perf/tests/shell/stat_all_metrics.sh
+++ b/tools/perf/tests/shell/stat_all_metrics.sh
@@ -25,16 +25,22 @@ for m in $(perf list --raw-dump metrics); do
# No error result and metric shown.
continue
fi
- if [[ "$result" =~ "Cannot resolve IDs for" ]]
+ if [[ "$result" =~ "Cannot resolve IDs for" || "$result" =~ "No supported events found" ]]
then
- echo "Metric contains missing events"
+ if [[ $(perf list --raw-dump $m) == "Default"* ]]
+ then
+ echo "[Ignored $m] failed but as a Default metric this can be expected"
+ echo $result
+ continue
+ fi
+ echo "[Failed $m] Metric contains missing events"
echo $result
err=1 # Fail
continue
elif [[ "$result" =~ \
"Access to performance monitoring and observability operations is limited" ]]
then
- echo "Permission failure"
+ echo "[Skipped $m] Permission failure"
echo $result
if [[ $err -eq 0 ]]
then
@@ -43,7 +49,7 @@ for m in $(perf list --raw-dump metrics); do
continue
elif [[ "$result" =~ "in per-thread mode, enable system wide" ]]
then
- echo "Permissions - need system wide mode"
+ echo "[Skipped $m] Permissions - need system wide mode"
echo $result
if [[ $err -eq 0 ]]
then
@@ -52,7 +58,13 @@ for m in $(perf list --raw-dump metrics); do
continue
elif [[ "$result" =~ "<not supported>" ]]
then
- echo "Not supported events"
+ if [[ $(perf list --raw-dump $m) == "Default"* ]]
+ then
+ echo "[Ignored $m] failed but as a Default metric this can be expected"
+ echo $result
+ continue
+ fi
+ echo "[Skipped $m] Not supported events"
echo $result
if [[ $err -eq 0 ]]
then
@@ -61,7 +73,7 @@ for m in $(perf list --raw-dump metrics); do
continue
elif [[ "$result" =~ "<not counted>" ]]
then
- echo "Not counted events"
+ echo "[Skipped $m] Not counted events"
echo $result
if [[ $err -eq 0 ]]
then
@@ -70,7 +82,7 @@ for m in $(perf list --raw-dump metrics); do
continue
elif [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]]
then
- echo "FP issues"
+ echo "[Skipped $m] FP issues"
echo $result
if [[ $err -eq 0 ]]
then
@@ -79,7 +91,7 @@ for m in $(perf list --raw-dump metrics); do
continue
elif [[ "$result" =~ "PMM" ]]
then
- echo "Optane memory issues"
+ echo "[Skipped $m] Optane memory issues"
echo $result
if [[ $err -eq 0 ]]
then
@@ -96,7 +108,7 @@ for m in $(perf list --raw-dump metrics); do
# No error result and metric shown.
continue
fi
- echo "Metric '$m' has non-zero error '$result_err' or not printed in:"
+ echo "[Failed $m] has non-zero error '$result_err' or not printed in:"
echo "$result"
err=1
done
diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh
index 69e3c2055134..be67d56e0f09 100755
--- a/tools/perf/tests/shell/test_bpf_metadata.sh
+++ b/tools/perf/tests/shell/test_bpf_metadata.sh
@@ -61,7 +61,7 @@ test_bpf_metadata() {
/perf_version/ {
if (entry) print $NF;
}
- ' | egrep "$VERS" > /dev/null
+ ' | grep -qF "$VERS"
then
echo "Basic BPF metadata test [Failed invalid output]"
err=1
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 9138fa83bf36..85233d435be6 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -34,6 +34,17 @@ trap_cleanup() {
}
trap trap_cleanup EXIT TERM INT
+is_arm64() {
+ [ "$(uname -m)" = "aarch64" ];
+}
+
+check_branches() {
+ if ! tr -s ' ' '\n' < "$TMPDIR/perf.script" | grep -E -m1 -q "$1"; then
+ echo "Branches missing $1"
+ err=1
+ fi
+}
+
test_user_branches() {
echo "Testing user branch stack sampling"
@@ -55,14 +66,67 @@ test_user_branches() {
)
for x in "${expected[@]}"
do
- if ! tr -s ' ' '\n' < "$TMPDIR/perf.script" | grep -E -m1 -q "$x"
- then
- echo "Branches missing $x"
- err=1
- fi
+ check_branches "$x"
done
+
+ # Dump addresses only this time
+ perf script -i "$TMPDIR/perf.data" --fields brstack | \
+ tr ' ' '\n' > "$TMPDIR/perf.script"
+
+ # There should be no kernel addresses with the u option, in either
+ # source or target addresses.
+ if grep -E -m1 "0x[89a-f][0-9a-f]{15}" $TMPDIR/perf.script; then
+ echo "ERROR: Kernel address found in user mode"
+ err=1
+ fi
# some branch types are still not being tested:
- # IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX
+ # IND COND_CALL COND_RET SYSRET SERROR NO_TX
+}
+
+test_trap_eret_branches() {
+ echo "Testing trap & eret branches"
+ if ! is_arm64; then
+ echo "skip: not arm64"
+ else
+ perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u,k -- \
+ perf test -w traploop 1000
+ perf script -i $TMPDIR/perf.data --fields brstacksym | \
+ tr ' ' '\n' > $TMPDIR/perf.script
+
+ # BRBINF<n>.TYPE == TRAP are mapped to PERF_BR_IRQ by the BRBE driver
+ check_branches "^trap_bench\+[^ ]+/[^ ]/IRQ/"
+ check_branches "^[^ ]+/trap_bench\+[^ ]+/ERET/"
+ fi
+}
+
+test_kernel_branches() {
+ echo "Testing that k option only includes kernel source addresses"
+
+ if ! perf record --branch-filter any,k -o- -- true > /dev/null; then
+ echo "skip: not enough privileges"
+ else
+ perf record -o $TMPDIR/perf.data --branch-filter any,k -- \
+ perf bench syscall basic --loop 1000
+ perf script -i $TMPDIR/perf.data --fields brstack | \
+ tr ' ' '\n' > $TMPDIR/perf.script
+
+ # Example of branch entries:
+ # "0xffffffff93bda241/0xffffffff93bda20f/M/-/-/..."
+ # Source addresses come first and target address can be either
+ # userspace or kernel even with k option, as long as the source
+ # is in kernel.
+
+ #Look for source addresses with top bit set
+ if ! grep -E -m1 "^0x[89a-f][0-9a-f]{15}" $TMPDIR/perf.script; then
+ echo "ERROR: Kernel branches missing"
+ err=1
+ fi
+ # Look for no source addresses without top bit set
+ if grep -E -m1 "^0x[0-7][0-9a-f]{0,15}" $TMPDIR/perf.script; then
+ echo "ERROR: User branches found with kernel filter"
+ err=1
+ fi
+ fi
}
# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u"
@@ -97,18 +161,42 @@ test_filter() {
fi
}
+test_syscall() {
+ echo "Testing syscalls"
+ # skip if perf doesn't have enough privileges
+ if ! perf record --branch-filter any,k -o- -- true > /dev/null; then
+ echo "skip: not enough privileges"
+ else
+ perf record -o $TMPDIR/perf.data --branch-filter \
+ any_call,save_type,u,k -c 10000 -- \
+ perf bench syscall basic --loop 1000
+ perf script -i $TMPDIR/perf.data --fields brstacksym | \
+ tr ' ' '\n' > $TMPDIR/perf.script
+
+ check_branches "getppid[^ ]*/SYSCALL/"
+ fi
+}
set -e
test_user_branches
+test_syscall
+test_kernel_branches
+test_trap_eret_branches
+
+any_call="CALL|IND_CALL|COND_CALL|SYSCALL|IRQ"
+
+if is_arm64; then
+ any_call="$any_call|FAULT_DATA|FAULT_INST"
+fi
-test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ"
+test_filter "any_call" "$any_call"
test_filter "call" "CALL|SYSCALL"
test_filter "cond" "COND"
test_filter "any_ret" "RET|COND_RET|SYSRET|ERET"
test_filter "call,cond" "CALL|SYSCALL|COND"
-test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND"
-test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET"
+test_filter "any_call,cond" "$any_call|COND"
+test_filter "any_call,cond,any_ret" "$any_call|COND|RET|COND_RET"
cleanup
exit $err
diff --git a/tools/perf/tests/shell/test_event_open_fallback.sh b/tools/perf/tests/shell/test_event_open_fallback.sh
new file mode 100755
index 000000000000..9420a7557c13
--- /dev/null
+++ b/tools/perf/tests/shell/test_event_open_fallback.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Perf event open fallback test
+# SPDX-License-Identifier: GPL-2.0
+
+skip_cnt=0
+ok_cnt=0
+err_cnt=0
+
+perf_record()
+{
+ perf record -o /dev/null "$@" -- true 1>/dev/null 2>&1
+}
+
+test_decrease_precise_ip()
+{
+ echo "Decrease precise ip test"
+
+ perf list pmu | grep -q 'cycles' || return 2
+
+ if ! perf_record -e cycles; then
+ return 2
+ fi
+
+ # It should reduce precision level down to 0 if needed.
+ if ! perf_record -e cycles:P; then
+ return 1
+ fi
+ return 0
+}
+
+test_decrease_precise_ip_complicated()
+{
+ echo "Decrease precise ip test (complicated case)"
+
+ perf list pmu | grep -q 'mem-loads-aux' || return 2
+
+ if ! perf_record -e '{mem-loads-aux:S,mem-loads:PS}'; then
+ return 1
+ fi
+ return 0
+}
+
+count_result()
+{
+ if [ "$1" -eq 2 ] ; then
+ skip_cnt=$((skip_cnt + 1))
+ return
+ fi
+ if [ "$1" -eq 0 ] ; then
+ ok_cnt=$((ok_cnt + 1))
+ return
+ fi
+ err_cnt=$((err_cnt + 1))
+}
+
+ret=0
+test_decrease_precise_ip || ret=$? ; count_result $ret ; ret=0
+test_decrease_precise_ip_complicated || ret=$? ; count_result $ret ; ret=0
+
+cleanup
+
+if [ ${err_cnt} -gt 0 ] ; then
+ exit 1
+fi
+
+if [ ${ok_cnt} -gt 0 ] ; then
+ exit 0
+fi
+
+# Skip
+exit 2
diff --git a/tools/perf/tests/shell/timechart.sh b/tools/perf/tests/shell/timechart.sh
new file mode 100755
index 000000000000..b14b3472c284
--- /dev/null
+++ b/tools/perf/tests/shell/timechart.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# perf timechart tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_timechart_test.perf.data.XXXXX)
+output=$(mktemp /tmp/__perf_timechart_test.output.XXXXX.svg)
+
+cleanup() {
+ rm -f "${perfdata}"
+ rm -f "${output}"
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+test_timechart() {
+ echo "Basic perf timechart test"
+
+ # Try to record timechart data.
+ # perf timechart record uses system-wide recording and specific tracepoints.
+ # If it fails (e.g. permissions, missing tracepoints), skip the test.
+ if ! perf timechart record -o "${perfdata}" true > /dev/null 2>&1; then
+ echo "Basic perf timechart test [Skipped: perf timechart record failed (permissions/events?)]"
+ return
+ fi
+
+ # Generate the timechart
+ if ! perf timechart -i "${perfdata}" -o "${output}" > /dev/null 2>&1; then
+ echo "Basic perf timechart test [Failed: perf timechart command failed]"
+ err=1
+ return
+ fi
+
+ # Check if output file exists and is not empty
+ if [ ! -s "${output}" ]; then
+ echo "Basic perf timechart test [Failed: output file is empty or missing]"
+ err=1
+ return
+ fi
+
+ # Check if it looks like an SVG
+ if ! grep -q "svg" "${output}"; then
+ echo "Basic perf timechart test [Failed: output doesn't look like SVG]"
+ err=1
+ return
+ fi
+
+ echo "Basic perf timechart test [Success]"
+}
+
+if ! perf check feature -q libtraceevent ; then
+ echo "perf timechart is not supported. Skip."
+ cleanup
+ exit 2
+fi
+
+test_timechart
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/top.sh b/tools/perf/tests/shell/top.sh
new file mode 100755
index 000000000000..768ebcf7a89c
--- /dev/null
+++ b/tools/perf/tests/shell/top.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# perf top tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+log_file=$(mktemp /tmp/perf.top.log.XXXXX)
+
+cleanup() {
+ rm -f "${log_file}"
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ echo "Unexpected signal in ${FUNCNAME[1]}"
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+test_basic_perf_top() {
+ echo "Basic perf top test"
+
+ # Start a workload that spins to generate samples
+ # thloop runs for the specified number of seconds
+ perf test -w thloop 20 &
+ PID=$!
+
+ # Allow it to start
+ sleep 0.1
+
+ # Run perf top for 5 seconds, monitoring that PID
+ # Use --stdio to avoid TUI and redirect output
+ # Use -d 1 to avoid flooding output
+ # Use -e cpu-clock to ensure we get samples
+ # Use sleep to keep stdin open but silent, preventing EOF loop or interactive spam
+ if ! sleep 10 | timeout 5s perf top --stdio -d 1 -e cpu-clock -p $PID > "${log_file}" 2>&1; then
+ retval=$?
+ if [ $retval -ne 124 ] && [ $retval -ne 0 ]; then
+ echo "Basic perf top test [Failed: perf top failed to start or run (ret=$retval)]"
+ head -n 50 "${log_file}"
+ kill $PID
+ wait $PID 2>/dev/null || true
+ err=1
+ return
+ fi
+ fi
+
+ kill $PID
+ wait $PID 2>/dev/null || true
+
+ # Check for some sample data (percentage)
+ if ! grep -E -q "[0-9]+\.[0-9]+%" "${log_file}"; then
+ echo "Basic perf top test [Failed: no sample percentage found]"
+ head -n 50 "${log_file}"
+ err=1
+ return
+ fi
+
+ # Check for the symbol
+ if ! grep -q "test_loop" "${log_file}"; then
+ echo "Basic perf top test [Failed: test_loop symbol not found]"
+ head -n 50 "${log_file}"
+ err=1
+ return
+ fi
+
+ echo "Basic perf top test [Success]"
+}
+
+test_basic_perf_top
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 572001d75d78..03e9f680a4a6 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -23,6 +23,14 @@ check_vmlinux() {
fi
}
+check_permissions() {
+ if perf trace -e $syscall $TESTPROG 2>&1 | grep -q "Operation not permitted"
+ then
+ echo "trace+enum test [Skipped permissions]"
+ err=2
+ fi
+}
+
trace_landlock() {
echo "Tracing syscall ${syscall}"
@@ -56,6 +64,9 @@ trace_non_syscall() {
}
check_vmlinux
+if [ $err = 0 ]; then
+ check_permissions
+fi
if [ $err = 0 ]; then
trace_landlock
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 5be294014d3b..15791fcb76b2 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -332,7 +332,7 @@ out_free_nodes:
static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
const char *sched_switch = "sched:sched_switch";
- const char *cycles = "cycles:u";
+ const char *cycles = "cpu-cycles:u";
struct switch_tracking switch_tracking = { .tids = NULL, };
struct record_opts opts = {
.mmap_pages = UINT_MAX,
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 97e62db8764a..cb67ddbd0375 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -120,7 +120,6 @@ DECLARE_SUITE(dso_data_cache);
DECLARE_SUITE(dso_data_reopen);
DECLARE_SUITE(parse_events);
DECLARE_SUITE(hists_link);
-DECLARE_SUITE(python_use);
DECLARE_SUITE(bp_signal);
DECLARE_SUITE(bp_signal_overflow);
DECLARE_SUITE(bp_accounting);
@@ -161,7 +160,7 @@ DECLARE_SUITE(bitmap_print);
DECLARE_SUITE(perf_hooks);
DECLARE_SUITE(unit_number__scnprint);
DECLARE_SUITE(mem2node);
-DECLARE_SUITE(maps__merge_in);
+DECLARE_SUITE(maps);
DECLARE_SUITE(time_utils);
DECLARE_SUITE(jit_write_elf);
DECLARE_SUITE(api_io);
@@ -179,6 +178,7 @@ DECLARE_SUITE(event_groups);
DECLARE_SUITE(symbols);
DECLARE_SUITE(util);
DECLARE_SUITE(subcmd_help);
+DECLARE_SUITE(kallsyms_split);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
@@ -239,6 +239,7 @@ DECLARE_WORKLOAD(sqrtloop);
DECLARE_WORKLOAD(brstack);
DECLARE_WORKLOAD(datasym);
DECLARE_WORKLOAD(landlock);
+DECLARE_WORKLOAD(traploop);
extern const char *dso_to_test;
extern const char *test_objdump_path;
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index 5af17206f04d..fb1012cc4fc3 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -7,8 +7,10 @@ perf-test-y += sqrtloop.o
perf-test-y += brstack.o
perf-test-y += datasym.o
perf-test-y += landlock.o
+perf-test-y += traploop.o
CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c
index 457b29f91c3e..bd8168f883fb 100644
--- a/tools/perf/tests/workloads/thloop.c
+++ b/tools/perf/tests/workloads/thloop.c
@@ -31,21 +31,52 @@ static void *thfunc(void *arg)
static int thloop(int argc, const char **argv)
{
- int sec = 1;
- pthread_t th;
+ int nt = 2, sec = 1, err = 1;
+ pthread_t *thread_list = NULL;
if (argc > 0)
sec = atoi(argv[0]);
+ if (sec <= 0) {
+ fprintf(stderr, "Error: seconds (%d) must be >= 1\n", sec);
+ return 1;
+ }
+
+ if (argc > 1)
+ nt = atoi(argv[1]);
+
+ if (nt <= 0) {
+ fprintf(stderr, "Error: thread count (%d) must be >= 1\n", nt);
+ return 1;
+ }
+
signal(SIGINT, sighandler);
signal(SIGALRM, sighandler);
- alarm(sec);
- pthread_create(&th, NULL, thfunc, test_loop);
- test_loop();
- pthread_join(th, NULL);
+ thread_list = calloc(nt, sizeof(pthread_t));
+ if (thread_list == NULL) {
+ fprintf(stderr, "Error: malloc failed for %d threads\n", nt);
+ goto out;
+ }
+ for (int i = 1; i < nt; i++) {
+ int ret = pthread_create(&thread_list[i], NULL, thfunc, test_loop);
- return 0;
+ if (ret) {
+ fprintf(stderr, "Error: failed to create thread %d\n", i);
+ done = 1; // Ensure started threads terminate.
+ goto out;
+ }
+ }
+ alarm(sec);
+ test_loop();
+ err = 0;
+out:
+ for (int i = 1; i < nt; i++) {
+ if (thread_list && thread_list[i])
+ pthread_join(thread_list[i], /*retval=*/NULL);
+ }
+ free(thread_list);
+ return err;
}
DEFINE_WORKLOAD(thloop);
diff --git a/tools/perf/tests/workloads/traploop.c b/tools/perf/tests/workloads/traploop.c
new file mode 100644
index 000000000000..68dec399a735
--- /dev/null
+++ b/tools/perf/tests/workloads/traploop.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include "../tests.h"
+
+#define BENCH_RUNS 999999
+
+#ifdef __aarch64__
+static void trap_bench(void)
+{
+ unsigned long val;
+
+ asm("mrs %0, ID_AA64ISAR0_EL1" : "=r" (val)); /* TRAP + ERET */
+}
+#else
+static void trap_bench(void) { }
+#endif
+
+static int traploop(int argc, const char **argv)
+{
+ int num_loops = BENCH_RUNS;
+
+ if (argc > 0)
+ num_loops = atoi(argv[0]);
+
+ for (int i = 0; i < num_loops; i++)
+ trap_bench();
+
+ return 0;
+}
+
+DEFINE_WORKLOAD(traploop);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 3b262487ec06..77d7c59f5d8b 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -34,10 +34,7 @@ typedef __kernel_sa_family_t sa_family_t;
struct sockaddr {
sa_family_t sa_family; /* address family, AF_xxx */
- union {
- char sa_data_min[14]; /* Minimum 14 bytes of protocol address */
- DECLARE_FLEX_ARRAY(char, sa_data);
- };
+ char sa_data[14]; /* 14 bytes of protocol address */
};
struct linger {
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index a15ac2fa4b20..3741ea1b73d8 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -90,10 +90,29 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+/* Reserved kernel ranges [-100], [-10000, -40000]. */
#define AT_FDCWD -100 /* Special value for dirfd used to
indicate openat should use the
current working directory. */
+/*
+ * The concept of process and threads in userland and the kernel is a confusing
+ * one - within the kernel every thread is a 'task' with its own individual PID,
+ * however from userland's point of view threads are grouped by a single PID,
+ * which is that of the 'thread group leader', typically the first thread
+ * spawned.
+ *
+ * To cut the Gideon knot, for internal kernel usage, we refer to
+ * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel
+ * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread
+ * group leader...
+ */
+#define PIDFD_SELF_THREAD -10000 /* Current thread. */
+#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
+
+#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */
/* Generic flags for the *at(2) family of syscalls. */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 0098b0ce8ccb..beb4c2d1e41c 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -60,6 +60,17 @@
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
+/*
+ * The root inode of procfs is guaranteed to always have the same inode number.
+ * For programs that make heavy use of procfs, verifying that the root is a
+ * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
+ * will allow you to make sure you are never tricked into operating on the
+ * wrong procfs file.
+ */
+enum procfs_ino {
+ PROCFS_ROOT_INO = 1,
+};
+
struct file_clone_range {
__s64 src_fd;
__u64 src_offset;
@@ -91,6 +102,63 @@ struct fs_sysfs_path {
__u8 name[128];
};
+/* Protection info capability flags */
+#define LBMD_PI_CAP_INTEGRITY (1 << 0)
+#define LBMD_PI_CAP_REFTAG (1 << 1)
+
+/* Checksum types for Protection Information */
+#define LBMD_PI_CSUM_NONE 0
+#define LBMD_PI_CSUM_IP 1
+#define LBMD_PI_CSUM_CRC16_T10DIF 2
+#define LBMD_PI_CSUM_CRC64_NVME 4
+
+/* sizeof first published struct */
+#define LBMD_SIZE_VER0 16
+
+/*
+ * Logical block metadata capability descriptor
+ * If the device does not support metadata, all the fields will be zero.
+ * Applications must check lbmd_flags to determine whether metadata is
+ * supported or not.
+ */
+struct logical_block_metadata_cap {
+ /* Bitmask of logical block metadata capability flags */
+ __u32 lbmd_flags;
+ /*
+ * The amount of data described by each unit of logical block
+ * metadata
+ */
+ __u16 lbmd_interval;
+ /*
+ * Size in bytes of the logical block metadata associated with each
+ * interval
+ */
+ __u8 lbmd_size;
+ /*
+ * Size in bytes of the opaque block tag associated with each
+ * interval
+ */
+ __u8 lbmd_opaque_size;
+ /*
+ * Offset in bytes of the opaque block tag within the logical block
+ * metadata
+ */
+ __u8 lbmd_opaque_offset;
+ /* Size in bytes of the T10 PI tuple associated with each interval */
+ __u8 lbmd_pi_size;
+ /* Offset in bytes of T10 PI tuple within the logical block metadata */
+ __u8 lbmd_pi_offset;
+ /* T10 PI guard tag type */
+ __u8 lbmd_guard_tag_type;
+ /* Size in bytes of the T10 PI application tag */
+ __u8 lbmd_app_tag_size;
+ /* Size in bytes of the T10 PI reference tag */
+ __u8 lbmd_ref_tag_size;
+ /* Size in bytes of the T10 PI storage tag */
+ __u8 lbmd_storage_tag_size;
+ __u8 pad;
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -149,6 +217,24 @@ struct fsxattr {
};
/*
+ * Variable size structure for file_[sg]et_attr().
+ *
+ * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'.
+ * As this structure is passed to/from userspace with its size, this can
+ * be versioned based on the size.
+ */
+struct file_attr {
+ __u64 fa_xflags; /* xflags field value (get/set) */
+ __u32 fa_extsize; /* extsize field value (get/set)*/
+ __u32 fa_nextents; /* nextents field value (get) */
+ __u32 fa_projid; /* project identifier (get/set) */
+ __u32 fa_cowextsize; /* CoW extsize field value (get/set) */
+};
+
+#define FILE_ATTR_SIZE_VER0 24
+#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0
+
+/*
* Flags for the fsx_xflags field
*/
#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
@@ -247,6 +333,8 @@ struct fsxattr {
* also /sys/kernel/debug/ for filesystems with debugfs exports
*/
#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
+/* Get logical block metadata capability details */
+#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
@@ -342,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t;
/* buffered IO that drops the cache after reading or writing data */
#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+/* prevent pipe and socket writes from raising SIGPIPE */
+#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
- RWF_DONTCACHE)
+ RWF_DONTCACHE | RWF_NOSIGNAL)
#define PROCFS_IOCTL_MAGIC 'f'
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 3b93fb906e3c..51c4e8c82b1e 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -177,7 +177,17 @@ struct prctl_mm_map {
#define PR_GET_TID_ADDRESS 40
+/*
+ * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0
+ * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively
+ * return "1" when no flags were specified for PR_SET_THP_DISABLE.
+ */
#define PR_SET_THP_DISABLE 41
+/*
+ * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
+ * VM_HUGEPAGE, MADV_COLLAPSE).
+ */
+# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
#define PR_GET_THP_DISABLE 42
/*
@@ -244,6 +254,8 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* MTE tag check store only */
+# define PR_MTE_STORE_ONLY (1UL << 19)
/* RISC-V pointer masking tag length */
# define PR_PMLEN_SHIFT 24
# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
@@ -255,7 +267,12 @@ struct prctl_mm_map {
/* Dispatch syscalls to a userspace handler */
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
-# define PR_SYS_DISPATCH_ON 1
+/* Enable dispatch except for the specified range */
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+/* Enable dispatch for the specified range */
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+/* Legacy name for backwards compatibility */
+# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
/* The control values for the user space selector when dispatch is enabled */
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
diff --git a/tools/perf/trace/beauty/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
index d4b3e2ae1314..c57674a6aa0d 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/vhost.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
@@ -235,4 +235,39 @@
*/
#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
+
+/* Extended features manipulation */
+#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \
+ struct vhost_features_array)
+#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \
+ struct vhost_features_array)
+
+/* fork_owner values for vhost */
+#define VHOST_FORK_OWNER_KTHREAD 0
+#define VHOST_FORK_OWNER_TASK 1
+
+/**
+ * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device,
+ * This ioctl must called before VHOST_SET_OWNER.
+ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
+ *
+ * @param fork_owner: An 8-bit value that determines the vhost thread mode
+ *
+ * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value):
+ * - Vhost will create vhost worker as tasks forked from the owner,
+ * inheriting all of the owner's attributes.
+ *
+ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD:
+ * - Vhost will create vhost workers as kernel threads.
+ */
+#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8)
+
+/**
+ * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device.
+ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
+ *
+ * @return: An 8-bit value indicating the current thread mode.
+ */
+#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8)
+
#endif
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 183902dac042..36aca8d6d003 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -4,7 +4,9 @@
#include "../ui.h"
#include "../../util/annotate.h"
#include "../../util/debug.h"
+#include "../../util/debuginfo.h"
#include "../../util/dso.h"
+#include "../../util/hashmap.h"
#include "../../util/hist.h"
#include "../../util/sort.h"
#include "../../util/map.h"
@@ -12,7 +14,9 @@
#include "../../util/symbol.h"
#include "../../util/evsel.h"
#include "../../util/evlist.h"
+#include "../../util/thread.h"
#include <inttypes.h>
+#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -27,10 +31,31 @@ struct annotate_browser {
struct rb_node *curr_hot;
struct annotation_line *selection;
struct arch *arch;
+ /*
+ * perf top can delete hist_entry anytime. Callers should make sure
+ * its lifetime.
+ */
+ struct hist_entry *he;
+ struct debuginfo *dbg;
+ struct evsel *evsel;
+ struct hashmap *type_hash;
bool searching_backwards;
char search_bf[128];
};
+/* A copy of target hist_entry for perf top. */
+static struct hist_entry annotate_he;
+
+static size_t type_hash(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool type_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
static inline struct annotation *browser__annotation(struct ui_browser *browser)
{
struct map_symbol *ms = browser->priv;
@@ -107,12 +132,21 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
.printf = annotate_browser__printf,
.write_graph = annotate_browser__write_graph,
};
+ struct annotation_print_data apd = {
+ .he = ab->he,
+ .arch = ab->arch,
+ .evsel = ab->evsel,
+ .dbg = ab->dbg,
+ };
/* The scroll bar isn't being used */
if (!browser->navkeypressed)
ops.width += 1;
- annotation_line__write(al, notes, &ops);
+ if (!IS_ERR_OR_NULL(ab->type_hash))
+ apd.type_hash = ab->type_hash;
+
+ annotation_line__write(al, notes, &ops, &apd);
if (ops.current_entry)
ab->selection = al;
@@ -515,9 +549,24 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser)
static int sym_title(struct symbol *sym, struct map *map, char *title,
size_t sz, int percent_type)
{
- return snprintf(title, sz, "%s %s [Percent: %s]", sym->name,
+ return snprintf(title, sz, "%s %s [Percent: %s] %s", sym->name,
dso__long_name(map__dso(map)),
- percent_type_str(percent_type));
+ percent_type_str(percent_type),
+ annotate_opts.code_with_type ? "[Type]" : "");
+}
+
+static void annotate_browser__show_function_title(struct annotate_browser *browser)
+{
+ struct ui_browser *b = &browser->b;
+ struct map_symbol *ms = b->priv;
+ struct symbol *sym = ms->sym;
+ char title[SYM_TITLE_MAX_SIZE];
+
+ sym_title(sym, ms->map, title, sizeof(title), annotate_opts.percent_type);
+
+ ui_browser__gotorc_title(b, 0, 0);
+ ui_browser__set_color(b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(b, title, b->width + 1);
}
/*
@@ -536,7 +585,6 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
struct map_symbol *ms = browser->b.priv, target_ms;
struct disasm_line *dl = disasm_line(browser->selection);
struct annotation *notes;
- char title[SYM_TITLE_MAX_SIZE];
if (!dl->ops.target.sym) {
ui_helpline__puts("The called function was not found.");
@@ -557,9 +605,14 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
target_ms.map = ms->map;
target_ms.sym = dl->ops.target.sym;
annotation__unlock(notes);
- symbol__tui_annotate(&target_ms, evsel, hbt);
- sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type);
- ui_browser__show_title(&browser->b, title);
+ __hist_entry__tui_annotate(browser->he, &target_ms, evsel, hbt, NO_ADDR);
+
+ /*
+ * The annotate_browser above changed the title with the target function
+ * and now it's back to the original function. Refresh the header line
+ * for the original function again.
+ */
+ annotate_browser__show_function_title(browser);
return true;
}
@@ -731,20 +784,12 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
return __annotate_browser__search_reverse(browser);
}
-static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
+static int annotate_browser__show(struct annotate_browser *browser, char *title, const char *help)
{
- struct map_symbol *ms = browser->priv;
- struct symbol *sym = ms->sym;
- char symbol_dso[SYM_TITLE_MAX_SIZE];
-
- if (ui_browser__show(browser, title, help) < 0)
+ if (ui_browser__show(&browser->b, title, help) < 0)
return -1;
- sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type);
-
- ui_browser__gotorc_title(browser, 0, 0);
- ui_browser__set_color(browser, HE_COLORSET_ROOT);
- ui_browser__write_nstring(browser, symbol_dso, browser->width + 1);
+ annotate_browser__show_function_title(browser);
return 0;
}
@@ -793,6 +838,42 @@ static int annotate__scnprintf_title(struct hists *hists, char *bf, size_t size)
return printed;
}
+static void annotate_browser__debuginfo_warning(struct annotate_browser *browser)
+{
+ struct map_symbol *ms = browser->b.priv;
+ struct dso *dso = map__dso(ms->map);
+
+ if (browser->dbg == NULL && annotate_opts.code_with_type &&
+ !dso__debuginfo_warned(dso)) {
+ ui__warning("DWARF debuginfo not found.\n\n"
+ "Data-type in this DSO will not be displayed.\n"
+ "Please make sure to have debug information.");
+ dso__set_debuginfo_warned(dso);
+ }
+}
+
+static s64 annotate_browser__curr_hot_offset(struct annotate_browser *browser)
+{
+ struct annotation_line *al = NULL;
+
+ if (browser->curr_hot)
+ al = rb_entry(browser->curr_hot, struct annotation_line, rb_node);
+
+ return al ? al->offset : 0;
+}
+
+static void annotate_browser__symbol_annotate_error(struct annotate_browser *browser, int err)
+{
+ struct map_symbol *ms = browser->b.priv;
+ struct symbol *sym = ms->sym;
+ struct dso *dso = map__dso(ms->map);
+ char msg[BUFSIZ];
+
+ dso__set_annotate_warned(dso);
+ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+}
+
static int annotate_browser__run(struct annotate_browser *browser,
struct evsel *evsel,
struct hist_browser_timer *hbt)
@@ -809,11 +890,16 @@ static int annotate_browser__run(struct annotate_browser *browser,
int key;
annotate__scnprintf_title(hists, title, sizeof(title));
- if (annotate_browser__show(&browser->b, title, help) < 0)
+ if (annotate_browser__show(browser, title, help) < 0)
return -1;
annotate_browser__calc_percent(browser, evsel);
+ if (browser->selection != NULL) {
+ browser->curr_hot = &browser->selection->rb_node;
+ browser->b.use_navkeypressed = false;
+ }
+
if (browser->curr_hot) {
annotate_browser__set_rb_top(browser, browser->curr_hot);
browser->b.navkeypressed = false;
@@ -823,6 +909,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
annotation_br_cntr_abbr_list(&br_cntr_text, evsel, false);
+ annotate_browser__debuginfo_warning(browser);
+
while (1) {
key = ui_browser__run(&browser->b, delay_secs);
@@ -845,7 +933,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
if (delay_secs != 0) {
symbol__annotate_decay_histogram(sym, evsel);
annotate__scnprintf_title(hists, title, sizeof(title));
- annotate_browser__show(&browser->b, title, help);
+ annotate_browser__show(browser, title, help);
}
continue;
case K_TAB:
@@ -891,11 +979,12 @@ static int annotate_browser__run(struct annotate_browser *browser,
"b Toggle percent base [period/hits]\n"
"B Branch counter abbr list (Optional)\n"
"? Search string backwards\n"
- "f Toggle showing offsets to full address\n");
+ "f Toggle showing offsets to full address\n"
+ "T Toggle data type display\n");
continue;
case 'r':
script_browse(NULL, NULL);
- annotate_browser__show(&browser->b, title, help);
+ annotate_browser__show(browser, title, help);
continue;
case 'k':
annotate_opts.show_linenr = !annotate_opts.show_linenr;
@@ -906,12 +995,24 @@ static int annotate_browser__run(struct annotate_browser *browser,
case 'H':
nd = browser->curr_hot;
break;
- case 's':
+ case 's': {
+ struct annotation_line *al = NULL;
+ s64 offset = annotate_browser__curr_hot_offset(browser);
+
if (annotate_browser__toggle_source(browser, evsel))
ui_helpline__puts(help);
+
+ /* Update the annotation browser's rb_tree, and reset the nd */
+ annotate_browser__calc_percent(browser, evsel);
+ /* Try to find the same asm line as before */
+ al = annotated_source__get_line(notes->src, offset);
+ browser->curr_hot = al ? &al->rb_node : NULL;
+ nd = browser->curr_hot;
+
annotate__scnprintf_title(hists, title, sizeof(title));
- annotate_browser__show(&browser->b, title, help);
+ annotate_browser__show(browser, title, help);
continue;
+ }
case 'o':
annotate_opts.use_offset = !annotate_opts.use_offset;
annotation__update_column_widths(notes);
@@ -975,7 +1076,7 @@ show_sup_ins:
continue;
}
case 'P':
- map_symbol__annotation_dump(ms, evsel);
+ map_symbol__annotation_dump(ms, evsel, browser->he);
continue;
case 't':
if (symbol_conf.show_total_period) {
@@ -998,7 +1099,7 @@ show_sup_ins:
case 'b':
switch_percent_type(&annotate_opts, key == 'b');
annotate__scnprintf_title(hists, title, sizeof(title));
- annotate_browser__show(&browser->b, title, help);
+ annotate_browser__show(browser, title, help);
continue;
case 'B':
if (br_cntr_text)
@@ -1011,6 +1112,17 @@ show_sup_ins:
case 'f':
annotation__toggle_full_addr(notes, ms);
continue;
+ case 'T':
+ annotate_opts.code_with_type ^= 1;
+ if (browser->dbg == NULL)
+ browser->dbg = dso__debuginfo(map__dso(ms->map));
+ if (browser->type_hash == NULL) {
+ browser->type_hash = hashmap__new(type_hash, type_equal,
+ /*ctx=*/NULL);
+ }
+ annotate_browser__show(browser, title, help);
+ annotate_browser__debuginfo_warning(browser);
+ continue;
case K_LEFT:
case '<':
case '>':
@@ -1032,25 +1144,20 @@ out:
return key;
}
-int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
- struct hist_browser_timer *hbt)
-{
- return symbol__tui_annotate(ms, evsel, hbt);
-}
-
int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
- struct hist_browser_timer *hbt)
+ struct hist_browser_timer *hbt, u64 al_addr)
{
/* reset abort key so that it can get Ctrl-C as a key */
SLang_reset_tty();
SLang_init_tty(0, 0, 0);
SLtty_set_suspend_state(true);
- return map_symbol__tui_annotate(&he->ms, evsel, hbt);
+ return __hist_entry__tui_annotate(he, &he->ms, evsel, hbt, al_addr);
}
-int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
- struct hist_browser_timer *hbt)
+int __hist_entry__tui_annotate(struct hist_entry *he, struct map_symbol *ms,
+ struct evsel *evsel,
+ struct hist_browser_timer *hbt, u64 al_addr)
{
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
@@ -1064,6 +1171,8 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
.priv = ms,
.use_navkeypressed = true,
},
+ .he = he,
+ .evsel = evsel,
};
struct dso *dso;
int ret = -1, err;
@@ -1079,10 +1188,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (not_annotated || !sym->annotate2) {
err = symbol__annotate2(ms, evsel, &browser.arch);
if (err) {
- char msg[BUFSIZ];
- dso__set_annotate_warned(dso);
- symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
- ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ annotate_browser__symbol_annotate_error(&browser, err);
return -1;
}
@@ -1091,10 +1197,31 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (!annotation__has_source(notes))
ui__warning("Annotation has no source code.");
}
+ } else {
+ err = evsel__get_arch(evsel, &browser.arch);
+ if (err) {
+ annotate_browser__symbol_annotate_error(&browser, err);
+ return -1;
+ }
+ }
+
+ /* Copy necessary information when it's called from perf top */
+ if (hbt != NULL && he != &annotate_he) {
+ annotate_he.hists = he->hists;
+ annotate_he.thread = thread__get(he->thread);
+ annotate_he.cpumode = he->cpumode;
+ map_symbol__copy(&annotate_he.ms, ms);
+
+ browser.he = &annotate_he;
}
ui_helpline__push("Press ESC to exit");
+ if (annotate_opts.code_with_type) {
+ browser.dbg = dso__debuginfo(dso);
+ browser.type_hash = hashmap__new(type_hash, type_equal, /*ctx=*/NULL);
+ }
+
browser.b.width = notes->src->widths.max_line_len;
browser.b.nr_entries = notes->src->nr_entries;
browser.b.entries = &notes->src->source;
@@ -1103,10 +1230,40 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (annotate_opts.hide_src_code)
ui_browser__init_asm_mode(&browser.b);
+ /*
+ * If al_addr is set, it means that there should be a line
+ * intentionally selected, not based on the percentages
+ * which caculated by the event sampling. In this case, we
+ * convey this information into the browser selection, where
+ * the selection in other cases should be empty.
+ */
+ if (al_addr != NO_ADDR) {
+ struct annotation_line *al = annotated_source__get_line(notes->src,
+ al_addr - sym->start);
+
+ browser.selection = al;
+ }
+
ret = annotate_browser__run(&browser, evsel, hbt);
+ debuginfo__delete(browser.dbg);
+
+ if (!IS_ERR_OR_NULL(browser.type_hash)) {
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ hashmap__for_each_entry(browser.type_hash, cur, bkt)
+ zfree(&cur->pvalue);
+ hashmap__free(browser.type_hash);
+ }
+
if (not_annotated && !notes->src->tried_source)
annotated_source__purge(notes->src);
+ if (hbt != NULL && he != &annotate_he) {
+ thread__zput(annotate_he.thread);
+ map_symbol__exit(&annotate_he.ms);
+ }
+
return ret;
}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index d9d3fb44477a..08fecbe28a52 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2484,8 +2484,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
else
evsel = hists_to_evsel(browser->hists);
- err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
he = hist_browser__selected_entry(browser);
+ err = __hist_entry__tui_annotate(he, &act->ms, evsel, browser->hbt, NO_ADDR);
/*
* offer option to annotate the other branch source or target
* (if they exists) when returning from annotate
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index b085eb0de849..e58327595d37 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <math.h>
#include <stdlib.h>
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
index 1dff3020e9d5..6722561e0458 100644
--- a/tools/perf/ui/libslang.h
+++ b/tools/perf/ui/libslang.h
@@ -15,11 +15,7 @@
#define ENABLE_SLFUTURE_CONST 1
#define ENABLE_SLFUTURE_VOID 1
-#ifdef HAVE_SLANG_INCLUDE_SUBDIR
-#include <slang/slang.h>
-#else
#include <slang.h>
-#endif
#define SL_KEY_UNTAB 0x1000
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 4959e7a990e4..1c2a43e1dc68 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -2,18 +2,19 @@ include $(srctree)/tools/scripts/Makefile.include
include $(srctree)/tools/scripts/utilities.mak
perf-util-y += arm64-frame-pointer-unwind-support.o
+perf-util-y += addr2line.o
perf-util-y += addr_location.o
perf-util-y += annotate.o
perf-util-y += block-info.o
perf-util-y += block-range.o
perf-util-y += build-id.o
perf-util-y += cacheline.o
+perf-util-y += capstone.o
perf-util-y += config.o
perf-util-y += copyfile.o
perf-util-y += ctype.o
perf-util-y += db-export.o
perf-util-y += disasm.o
-perf-util-y += disasm_bpf.o
perf-util-y += env.o
perf-util-y += event.o
perf-util-y += evlist.o
@@ -23,8 +24,9 @@ perf-util-y += evsel_fprintf.o
perf-util-y += perf_event_attr_fprintf.o
perf-util-y += evswitch.o
perf-util-y += find_bit.o
-perf-util-y += get_current_dir_name.o
perf-util-y += levenshtein.o
+perf-util-$(CONFIG_LIBBFD) += libbfd.o
+perf-util-y += llvm.o
perf-util-y += mmap.o
perf-util-y += memswap.o
perf-util-y += parse-events.o
@@ -127,21 +129,22 @@ perf-util-y += iostat.o
perf-util-y += stream.o
perf-util-y += kvm-stat.o
perf-util-y += lock-contention.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += auxtrace.o
perf-util-y += intel-pt-decoder/
-perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
-perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
-perf-util-$(CONFIG_AUXTRACE) += arm-spe.o
-perf-util-$(CONFIG_AUXTRACE) += arm-spe-decoder/
-perf-util-$(CONFIG_AUXTRACE) += hisi-ptt.o
-perf-util-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/
-perf-util-$(CONFIG_AUXTRACE) += s390-cpumsf.o
+perf-util-y += intel-pt.o
+perf-util-y += intel-bts.o
+perf-util-y += arm-spe.o
+perf-util-y += arm-spe-decoder/
+perf-util-y += hisi-ptt.o
+perf-util-y += hisi-ptt-decoder/
+perf-util-y += s390-cpumsf.o
+perf-util-y += powerpc-vpadtl.o
ifdef CONFIG_LIBOPENCSD
-perf-util-$(CONFIG_AUXTRACE) += cs-etm.o
-perf-util-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+perf-util-y += cs-etm.o
+perf-util-y += cs-etm-decoder/
endif
-perf-util-$(CONFIG_AUXTRACE) += cs-etm-base.o
+perf-util-y += cs-etm-base.o
perf-util-y += parse-branch-options.o
perf-util-y += dump-insn.o
diff --git a/tools/perf/util/addr2line.c b/tools/perf/util/addr2line.c
new file mode 100644
index 000000000000..f2d94a3272d7
--- /dev/null
+++ b/tools/perf/util/addr2line.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "addr2line.h"
+#include "debug.h"
+#include "dso.h"
+#include "string2.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "symbol_conf.h"
+
+#include <api/io.h>
+#include <linux/zalloc.h>
+#include <subcmd/run-command.h>
+
+#include <inttypes.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_INLINE_NEST 1024
+
+/* If addr2line doesn't return data for 1 second then timeout. */
+int addr2line_timeout_ms = 1 * 1000;
+
+static int filename_split(char *filename, unsigned int *line_nr)
+{
+ char *sep;
+
+ sep = strchr(filename, '\n');
+ if (sep)
+ *sep = '\0';
+
+ if (!strcmp(filename, "??:0"))
+ return 0;
+
+ sep = strchr(filename, ':');
+ if (sep) {
+ *sep++ = '\0';
+ *line_nr = strtoul(sep, NULL, 0);
+ return 1;
+ }
+ pr_debug("addr2line missing ':' in filename split\n");
+ return 0;
+}
+
+static void addr2line_subprocess_cleanup(struct child_process *a2l)
+{
+ if (a2l->pid != -1) {
+ kill(a2l->pid, SIGKILL);
+ finish_command(a2l); /* ignore result, we don't care */
+ a2l->pid = -1;
+ close(a2l->in);
+ close(a2l->out);
+ }
+
+ free(a2l);
+}
+
+static struct child_process *addr2line_subprocess_init(const char *addr2line_path,
+ const char *binary_path)
+{
+ const char *argv[] = {
+ addr2line_path ?: "addr2line",
+ "-e", binary_path,
+ "-a", "-i", "-f", NULL
+ };
+ struct child_process *a2l = zalloc(sizeof(*a2l));
+ int start_command_status = 0;
+
+ if (a2l == NULL) {
+ pr_err("Failed to allocate memory for addr2line");
+ return NULL;
+ }
+
+ a2l->pid = -1;
+ a2l->in = -1;
+ a2l->out = -1;
+ a2l->no_stderr = 1;
+
+ a2l->argv = argv;
+ start_command_status = start_command(a2l);
+ a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */
+
+ if (start_command_status != 0) {
+ pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n",
+ addr2line_path, binary_path, start_command_status);
+ addr2line_subprocess_cleanup(a2l);
+ return NULL;
+ }
+
+ return a2l;
+}
+
+enum a2l_style {
+ BROKEN,
+ GNU_BINUTILS,
+ LLVM,
+};
+
+static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name)
+{
+ static bool cached;
+ static enum a2l_style style;
+
+ if (!cached) {
+ char buf[128];
+ struct io io;
+ int ch;
+ int lines;
+
+ if (write(a2l->in, ",\n", 2) != 2)
+ return BROKEN;
+
+ io__init(&io, a2l->out, buf, sizeof(buf));
+ ch = io__get_char(&io);
+ if (ch == ',') {
+ style = LLVM;
+ cached = true;
+ lines = 1;
+ pr_debug3("Detected LLVM addr2line style\n");
+ } else if (ch == '0') {
+ style = GNU_BINUTILS;
+ cached = true;
+ lines = 3;
+ pr_debug3("Detected binutils addr2line style\n");
+ } else {
+ if (!symbol_conf.disable_add2line_warn) {
+ char *output = NULL;
+ size_t output_len;
+
+ io__getline(&io, &output, &output_len);
+ pr_warning("%s %s: addr2line configuration failed\n",
+ __func__, dso_name);
+ pr_warning("\t%c%s", ch, output);
+ }
+ pr_debug("Unknown/broken addr2line style\n");
+ return BROKEN;
+ }
+ while (lines) {
+ ch = io__get_char(&io);
+ if (ch <= 0)
+ break;
+ if (ch == '\n')
+ lines--;
+ }
+ /* Ignore SIGPIPE in the event addr2line exits. */
+ signal(SIGPIPE, SIG_IGN);
+ }
+ return style;
+}
+
+static int read_addr2line_record(struct io *io,
+ enum a2l_style style,
+ const char *dso_name,
+ u64 addr,
+ bool first,
+ char **function,
+ char **filename,
+ unsigned int *line_nr)
+{
+ /*
+ * Returns:
+ * -1 ==> error
+ * 0 ==> sentinel (or other ill-formed) record read
+ * 1 ==> a genuine record read
+ */
+ char *line = NULL;
+ size_t line_len = 0;
+ unsigned int dummy_line_nr = 0;
+ int ret = -1;
+
+ if (function != NULL)
+ zfree(function);
+
+ if (filename != NULL)
+ zfree(filename);
+
+ if (line_nr != NULL)
+ *line_nr = 0;
+
+ /*
+ * Read the first line. Without an error this will be:
+ * - for the first line an address like 0x1234,
+ * - the binutils sentinel 0x0000000000000000,
+ * - the llvm-addr2line the sentinel ',' character,
+ * - the function name line for an inlined function.
+ */
+ if (io__getline(io, &line, &line_len) < 0 || !line_len)
+ goto error;
+
+ pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
+ if (style == LLVM && line_len == 2 && line[0] == ',') {
+ /* Found the llvm-addr2line sentinel character. */
+ zfree(&line);
+ return 0;
+ } else if (style == GNU_BINUTILS && (!first || addr != 0)) {
+ int zero_count = 0, non_zero_count = 0;
+ /*
+ * Check for binutils sentinel ignoring it for the case the
+ * requested address is 0.
+ */
+
+ /* A given address should always start 0x. */
+ if (line_len >= 2 || line[0] != '0' || line[1] != 'x') {
+ for (size_t i = 2; i < line_len; i++) {
+ if (line[i] == '0')
+ zero_count++;
+ else if (line[i] != '\n')
+ non_zero_count++;
+ }
+ if (!non_zero_count) {
+ int ch;
+
+ if (first && !zero_count) {
+ /* Line was erroneous just '0x'. */
+ goto error;
+ }
+ /*
+ * Line was 0x0..0, the sentinel for binutils. Remove
+ * the function and filename lines.
+ */
+ zfree(&line);
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ return 0;
+ }
+ }
+ }
+ /* Read the second function name line (if inline data then this is the first line). */
+ if (first && (io__getline(io, &line, &line_len) < 0 || !line_len))
+ goto error;
+
+ pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line);
+ if (function != NULL)
+ *function = strdup(strim(line));
+
+ zfree(&line);
+ line_len = 0;
+
+ /* Read the third filename and line number line. */
+ if (io__getline(io, &line, &line_len) < 0 || !line_len)
+ goto error;
+
+ pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
+ if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 &&
+ style == GNU_BINUTILS) {
+ ret = 0;
+ goto error;
+ }
+
+ if (filename != NULL)
+ *filename = strdup(line);
+
+ zfree(&line);
+ line_len = 0;
+
+ return 1;
+
+error:
+ free(line);
+ if (function != NULL)
+ zfree(function);
+ if (filename != NULL)
+ zfree(filename);
+ return ret;
+}
+
+static int inline_list__append_record(struct dso *dso,
+ struct inline_node *node,
+ struct symbol *sym,
+ const char *function,
+ const char *filename,
+ unsigned int line_nr)
+{
+ struct symbol *inline_sym = new_inline_sym(dso, sym, function);
+
+ return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node);
+}
+
+int cmd__addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line_nr,
+ struct dso *dso,
+ bool unwind_inlines,
+ struct inline_node *node,
+ struct symbol *sym __maybe_unused)
+{
+ struct child_process *a2l = dso__a2l(dso);
+ char *record_function = NULL;
+ char *record_filename = NULL;
+ unsigned int record_line_nr = 0;
+ int record_status = -1;
+ int ret = 0;
+ size_t inline_count = 0;
+ int len;
+ char buf[128];
+ ssize_t written;
+ struct io io = { .eof = false };
+ enum a2l_style a2l_style;
+
+ if (!a2l) {
+ if (!filename__has_section(dso_name, ".debug_line"))
+ goto out;
+
+ dso__set_a2l(dso,
+ addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name));
+ a2l = dso__a2l(dso);
+ }
+
+ if (a2l == NULL) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name);
+ goto out;
+ }
+ a2l_style = addr2line_configure(a2l, dso_name);
+ if (a2l_style == BROKEN)
+ goto out;
+
+ /*
+ * Send our request and then *deliberately* send something that can't be
+ * interpreted as a valid address to ask addr2line about (namely,
+ * ","). This causes addr2line to first write out the answer to our
+ * request, in an unbounded/unknown number of records, and then to write
+ * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or ","
+ * for llvm-addr2line, so that we can detect when it has finished giving
+ * us anything useful.
+ */
+ len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr);
+ written = len > 0 ? write(a2l->in, buf, len) : -1;
+ if (written != len) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not send request\n", __func__, dso_name);
+ goto out;
+ }
+ io__init(&io, a2l->out, buf, sizeof(buf));
+ io.timeout_ms = addr2line_timeout_ms;
+ switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true,
+ &record_function, &record_filename, &record_line_nr)) {
+ case -1:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not read first record\n", __func__, dso_name);
+ goto out;
+ case 0:
+ /*
+ * The first record was invalid, so return failure, but first
+ * read another record, since we sent a sentinel ',' for the
+ * sake of detected the last inlined function. Treat this as the
+ * first of a record as the ',' generates a new start with GNU
+ * binutils, also force a non-zero address as we're no longer
+ * reading that record.
+ */
+ switch (read_addr2line_record(&io, a2l_style, dso_name,
+ /*addr=*/1, /*first=*/true,
+ NULL, NULL, NULL)) {
+ case -1:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: could not read sentinel record\n",
+ __func__, dso_name);
+ break;
+ case 0:
+ /* The sentinel as expected. */
+ break;
+ default:
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("%s %s: unexpected record instead of sentinel",
+ __func__, dso_name);
+ break;
+ }
+ goto out;
+ default:
+ /* First record as expected. */
+ break;
+ }
+
+ if (file) {
+ *file = strdup(record_filename);
+ ret = 1;
+ }
+ if (line_nr)
+ *line_nr = record_line_nr;
+
+ if (unwind_inlines) {
+ if (node && inline_list__append_record(dso, node, sym,
+ record_function,
+ record_filename,
+ record_line_nr)) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * We have to read the records even if we don't care about the inline
+ * info. This isn't the first record and force the address to non-zero
+ * as we're reading records beyond the first.
+ */
+ while ((record_status = read_addr2line_record(&io,
+ a2l_style,
+ dso_name,
+ /*addr=*/1,
+ /*first=*/false,
+ &record_function,
+ &record_filename,
+ &record_line_nr)) == 1) {
+ if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) {
+ if (inline_list__append_record(dso, node, sym,
+ record_function,
+ record_filename,
+ record_line_nr)) {
+ ret = 0;
+ goto out;
+ }
+ ret = 1; /* found at least one inline frame */
+ }
+ }
+
+out:
+ free(record_function);
+ free(record_filename);
+ if (io.eof) {
+ dso__set_a2l(dso, NULL);
+ addr2line_subprocess_cleanup(a2l);
+ }
+ return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+ struct child_process *a2l = dso__a2l(dso);
+
+ if (!a2l)
+ return;
+
+ addr2line_subprocess_cleanup(a2l);
+
+ dso__set_a2l(dso, NULL);
+}
diff --git a/tools/perf/util/addr2line.h b/tools/perf/util/addr2line.h
new file mode 100644
index 000000000000..d35a47ba8dab
--- /dev/null
+++ b/tools/perf/util/addr2line.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ADDR2LINE_H
+#define __PERF_ADDR2LINE_H
+
+#include <linux/types.h>
+
+struct dso;
+struct inline_node;
+struct symbol;
+
+extern int addr2line_timeout_ms;
+
+int cmd__addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line_nr,
+ struct dso *dso,
+ bool unwind_inlines,
+ struct inline_node *node,
+ struct symbol *sym);
+
+#endif /* __PERF_ADDR2LINE_H */
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 1ef2edbc71d9..07cf9c334be0 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -4,7 +4,7 @@
*
* Written by Namhyung Kim <namhyung@kernel.org>
*/
-
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -58,6 +58,10 @@ void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
case TSR_KIND_CONST:
pr_info(" constant\n");
return;
+ case TSR_KIND_PERCPU_POINTER:
+ pr_info(" percpu pointer");
+ /* it also prints the type info */
+ break;
case TSR_KIND_POINTER:
pr_info(" pointer");
/* it also prints the type info */
@@ -573,25 +577,35 @@ struct type_state_stack *find_stack_state(struct type_state *state,
}
void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
- Dwarf_Die *type_die)
+ Dwarf_Die *type_die, int ptr_offset)
{
int tag;
Dwarf_Word size;
- if (dwarf_aggregate_size(type_die, &size) < 0)
+ if (kind == TSR_KIND_POINTER) {
+ /* TODO: arch-dependent pointer size */
+ size = sizeof(void *);
+ }
+ else if (dwarf_aggregate_size(type_die, &size) < 0)
size = 0;
- tag = dwarf_tag(type_die);
-
stack->type = *type_die;
stack->size = size;
stack->offset = offset;
+ stack->ptr_offset = ptr_offset;
stack->kind = kind;
+ if (kind == TSR_KIND_POINTER) {
+ stack->compound = false;
+ return;
+ }
+
+ tag = dwarf_tag(type_die);
+
switch (tag) {
case DW_TAG_structure_type:
case DW_TAG_union_type:
- stack->compound = (kind != TSR_KIND_POINTER);
+ stack->compound = (kind != TSR_KIND_PERCPU_POINTER);
break;
default:
stack->compound = false;
@@ -601,18 +615,19 @@ void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
- Dwarf_Die *type_die)
+ Dwarf_Die *type_die,
+ int ptr_offset)
{
struct type_state_stack *stack = find_stack_state(state, offset);
if (stack) {
- set_stack_state(stack, offset, kind, type_die);
+ set_stack_state(stack, offset, kind, type_die, ptr_offset);
return stack;
}
stack = malloc(sizeof(*stack));
if (stack) {
- set_stack_state(stack, offset, kind, type_die);
+ set_stack_state(stack, offset, kind, type_die, ptr_offset);
list_add(&stack->list, &state->stack_vars);
}
return stack;
@@ -868,6 +883,11 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
int offset = var->offset;
struct type_state_stack *stack;
+ /* If the reg location holds the pointer value, dereference the type */
+ if (!var->is_reg_var_addr && is_pointer_type(&mem_die) &&
+ __die_get_real_type(&mem_die, &mem_die) == NULL)
+ continue;
+
if (var->reg != DWARF_REG_FB)
offset -= fb_offset;
@@ -877,7 +897,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
continue;
findnew_stack_state(state, offset, TSR_KIND_TYPE,
- &mem_die);
+ &mem_die, /*ptr_offset=*/0);
if (var->reg == state->stack_reg) {
pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)",
@@ -887,24 +907,45 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
insn_offset, -offset);
}
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
- } else if (has_reg_type(state, var->reg) && var->offset == 0) {
+ } else if (has_reg_type(state, var->reg)) {
struct type_state_reg *reg;
Dwarf_Die orig_type;
reg = &state->regs[var->reg];
if (reg->ok && reg->kind == TSR_KIND_TYPE &&
- !is_better_type(&reg->type, &mem_die))
+ (!is_better_type(&reg->type, &mem_die) || var->is_reg_var_addr))
continue;
- orig_type = reg->type;
+ /* Handle address registers with TSR_KIND_POINTER */
+ if (var->is_reg_var_addr) {
+ if (reg->ok && reg->kind == TSR_KIND_POINTER &&
+ !is_better_type(&reg->type, &mem_die))
+ continue;
+ reg->offset = -var->offset;
+ reg->type = mem_die;
+ reg->kind = TSR_KIND_POINTER;
+ reg->ok = true;
+
+ pr_debug_dtp("var [%"PRIx64"] reg%d addr offset %x",
+ insn_offset, var->reg, var->offset);
+ pr_debug_type_name(&mem_die, TSR_KIND_POINTER);
+ continue;
+ }
+
+ orig_type = reg->type;
+ /*
+ * var->offset + reg value is the beginning of the struct
+ * reg->offset is the offset the reg points
+ */
+ reg->offset = -var->offset;
reg->type = mem_die;
reg->kind = TSR_KIND_TYPE;
reg->ok = true;
- pr_debug_dtp("var [%"PRIx64"] reg%d",
- insn_offset, var->reg);
+ pr_debug_dtp("var [%"PRIx64"] reg%d offset %x",
+ insn_offset, var->reg, var->offset);
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
/*
@@ -1092,7 +1133,7 @@ again:
if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL)
return PERF_TMR_NO_POINTER;
- dloc->type_offset = dloc->op->offset;
+ dloc->type_offset = dloc->op->offset + state->regs[reg].offset;
if (dwarf_tag(type_die) == DW_TAG_typedef)
die_get_real_type(type_die, &sized_type);
@@ -1108,6 +1149,30 @@ again:
}
if (state->regs[reg].kind == TSR_KIND_POINTER) {
+ struct strbuf sb;
+
+ strbuf_init(&sb, 32);
+ die_get_typename_from_type(&state->regs[reg].type, &sb);
+ pr_debug_dtp("(ptr->%s)", sb.buf);
+ strbuf_release(&sb);
+
+ /*
+ * Register holds a pointer (address) to the target variable.
+ * The type is the type of the variable it points to.
+ */
+ *type_die = state->regs[reg].type;
+
+ dloc->type_offset = dloc->op->offset + state->regs[reg].offset;
+
+ /* Get the size of the actual type */
+ if (dwarf_aggregate_size(type_die, &size) < 0 ||
+ (unsigned)dloc->type_offset >= size)
+ return PERF_TMR_BAD_OFFSET;
+
+ return PERF_TMR_OK;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_PERCPU_POINTER) {
pr_debug_dtp("percpu ptr");
/*
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 541fee1a5f0a..869307c7f130 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -34,6 +34,7 @@ enum type_state_kind {
TSR_KIND_TYPE,
TSR_KIND_PERCPU_BASE,
TSR_KIND_CONST,
+ TSR_KIND_PERCPU_POINTER,
TSR_KIND_POINTER,
TSR_KIND_CANARY,
};
@@ -173,6 +174,12 @@ extern struct annotated_data_stat ann_data_stat;
struct type_state_reg {
Dwarf_Die type;
u32 imm_value;
+ /*
+ * The offset within the struct that the register points to.
+ * A value of 0 means the register points to the beginning.
+ * type_offset = op->offset + reg->offset
+ */
+ s32 offset;
bool ok;
bool caller_saved;
u8 kind;
@@ -184,17 +191,22 @@ struct type_state_stack {
struct list_head list;
Dwarf_Die type;
int offset;
+ /* pointer offset, saves tsr->offset on the stack state */
+ int ptr_offset;
int size;
bool compound;
u8 kind;
};
-/* FIXME: This should be arch-dependent */
-#ifdef __powerpc__
+/*
+ * Maximum number of registers tracked in type_state.
+ *
+ * This limit must cover all supported architectures, since perf
+ * may analyze perf.data files generated on systems with a different
+ * register set. Use 32 as a safe upper bound instead of relying on
+ * build-arch specific values.
+ */
#define TYPE_STATE_MAX_REGS 32
-#else
-#define TYPE_STATE_MAX_REGS 16
-#endif
/*
* State table to maintain type info in each register and stack location.
@@ -237,9 +249,10 @@ int annotated_data_type__get_member_name(struct annotated_data_type *adt,
bool has_reg_type(struct type_state *state, int reg);
struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
- Dwarf_Die *type_die);
+ Dwarf_Die *type_die,
+ int ptr_offset);
void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
- Dwarf_Die *type_die);
+ Dwarf_Die *type_die, int ptr_offset);
struct type_state_stack *find_stack_state(struct type_state *state,
int offset);
bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0dd475a744b6..cc7764455faf 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -765,14 +765,16 @@ __hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
struct debuginfo *dbg, struct disasm_line *dl,
int *type_offset);
-struct annotation_print_data {
- struct hist_entry *he;
- struct evsel *evsel;
- struct arch *arch;
- struct debuginfo *dbg;
- u64 start;
- int addr_fmt_width;
-};
+static bool needs_type_info(struct annotated_data_type *data_type)
+{
+ if (data_type == NULL || data_type == NO_TYPE)
+ return false;
+
+ if (verbose)
+ return true;
+
+ return (data_type != &stackop_type) && (data_type != &canary_type);
+}
static int
annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd,
@@ -845,7 +847,7 @@ annotation_line__print(struct annotation_line *al, struct annotation_print_data
printf(" : ");
- disasm_line__print(dl, apd->start, apd->addr_fmt_width);
+ disasm_line__print(dl, notes->src->start, apd->addr_fmt_width);
if (opts->code_with_type && apd->dbg) {
struct annotated_data_type *data_type;
@@ -853,7 +855,7 @@ annotation_line__print(struct annotation_line *al, struct annotation_print_data
data_type = __hist_entry__get_data_type(apd->he, apd->arch,
apd->dbg, dl, &offset);
- if (data_type && data_type != NO_TYPE) {
+ if (needs_type_info(data_type)) {
char buf[4096];
printf("\t\t# data-type: %s",
@@ -978,7 +980,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
annotation__calc_percent(notes, evsel, symbol__size(sym));
}
-static int evsel__get_arch(struct evsel *evsel, struct arch **parch)
+int evsel__get_arch(struct evsel *evsel, struct arch **parch)
{
struct perf_env *env = evsel__env(evsel);
const char *arch_name = perf_env__arch(env);
@@ -1013,14 +1015,13 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotate_args args = {
- .evsel = evsel,
.options = &annotate_opts,
};
struct arch *arch = NULL;
int err, nr;
err = evsel__get_arch(evsel, &arch);
- if (err < 0)
+ if (err)
return err;
if (parch)
@@ -1230,7 +1231,6 @@ int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel)
struct annotation_print_data apd = {
.he = he,
.evsel = evsel,
- .start = map__rip_2objdump(map, sym->start),
};
int printed = 2, queue_len = 0;
int more = 0;
@@ -1267,9 +1267,9 @@ int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel)
symbol__annotate_hits(sym, evsel);
apd.addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source,
- apd.start);
+ notes->src->start);
evsel__get_arch(evsel, &apd.arch);
- apd.dbg = debuginfo__new(filename);
+ apd.dbg = dso__debuginfo(dso);
list_for_each_entry(pos, &notes->src->source, node) {
int err;
@@ -1357,7 +1357,8 @@ static void FILE__write_graph(void *fp, int graph)
fputs(s, fp);
}
-static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
+ struct annotation_print_data *apd)
{
struct annotation *notes = symbol__annotation(sym);
struct annotation_write_ops wops = {
@@ -1371,24 +1372,37 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
};
struct annotation_line *al;
+ if (annotate_opts.code_with_type) {
+ evsel__get_arch(apd->evsel, &apd->arch);
+ apd->dbg = dso__debuginfo(map__dso(apd->he->ms.map));
+ }
+
list_for_each_entry(al, &notes->src->source, node) {
if (annotation_line__filter(al))
continue;
- annotation_line__write(al, notes, &wops);
+ annotation_line__write(al, notes, &wops, apd);
fputc('\n', fp);
wops.first_line = false;
}
+ if (annotate_opts.code_with_type)
+ debuginfo__delete(apd->dbg);
+
return 0;
}
-int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel)
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+ struct hist_entry *he)
{
const char *ev_name = evsel__name(evsel);
char buf[1024];
char *filename;
int err = -1;
FILE *fp;
+ struct annotation_print_data apd = {
+ .he = he,
+ .evsel = evsel,
+ };
if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
return -1;
@@ -1404,7 +1418,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel)
fprintf(fp, "%s() %s\nEvent: %s\n\n",
ms->sym->name, dso__long_name(map__dso(ms->map)), ev_name);
- symbol__annotate_fprintf2(ms->sym, fp);
+ symbol__annotate_fprintf2(ms->sym, fp, &apd);
fclose(fp);
err = 0;
@@ -1656,6 +1670,10 @@ int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel)
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
struct hists *hists = evsel__hists(evsel);
+ struct annotation_print_data apd = {
+ .he = he,
+ .evsel = evsel,
+ };
char buf[1024];
int err;
@@ -1678,7 +1696,7 @@ int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel)
hists__scnprintf_title(hists, buf, sizeof(buf));
fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
buf, percent_type_str(annotate_opts.percent_type), sym->name, dso__long_name(dso));
- symbol__annotate_fprintf2(sym, stdout);
+ symbol__annotate_fprintf2(sym, stdout, &apd);
annotated_source__purge(symbol__annotation(sym)->src);
@@ -1743,7 +1761,7 @@ static double annotation_line__max_percent(struct annotation_line *al,
return percent_max;
}
-static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+static int disasm_line__write(struct disasm_line *dl, struct annotation *notes,
void *obj, char *bf, size_t size,
void (*obj__printf)(void *obj, const char *fmt, ...),
void (*obj__write_graph)(void *obj, int graph))
@@ -1771,8 +1789,8 @@ call_like:
obj__printf(obj, " ");
}
- disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset,
- notes->src->widths.max_ins_name);
+ return disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset,
+ notes->src->widths.max_ins_name) + 2;
}
static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
@@ -1935,24 +1953,82 @@ err:
return -ENOMEM;
}
-static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
- bool first_line, bool current_entry, bool change_color, int width,
- void *obj, unsigned int percent_type,
- int (*obj__set_color)(void *obj, int color),
- void (*obj__set_percent_color)(void *obj, double percent, bool current),
- int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
- void (*obj__printf)(void *obj, const char *fmt, ...),
- void (*obj__write_graph)(void *obj, int graph))
+struct type_hash_entry {
+ struct annotated_data_type *type;
+ int offset;
+};
+static int disasm_line__snprint_type_info(struct disasm_line *dl,
+ char *buf, int len,
+ struct annotation_print_data *apd)
{
- double percent_max = annotation_line__max_percent(al, percent_type);
- int pcnt_width = annotation__pcnt_width(notes),
- cycles_width = annotation__cycles_width(notes);
+ struct annotated_data_type *data_type = NULL;
+ struct type_hash_entry *entry = NULL;
+ char member[256];
+ int offset = 0;
+ int printed;
+
+ scnprintf(buf, len, " ");
+
+ if (!annotate_opts.code_with_type || apd->dbg == NULL)
+ return 1;
+
+ if (apd->type_hash) {
+ hashmap__find(apd->type_hash, dl->al.offset, &entry);
+ if (entry != NULL) {
+ data_type = entry->type;
+ offset = entry->offset;
+ }
+ }
+
+ if (data_type == NULL)
+ data_type = __hist_entry__get_data_type(apd->he, apd->arch, apd->dbg, dl, &offset);
+
+ if (apd->type_hash && entry == NULL) {
+ entry = malloc(sizeof(*entry));
+ if (entry != NULL) {
+ entry->type = data_type;
+ entry->offset = offset;
+ hashmap__add(apd->type_hash, dl->al.offset, entry);
+ }
+ }
+
+ if (!needs_type_info(data_type))
+ return 1;
+
+ printed = scnprintf(buf, len, "\t\t# data-type: %s", data_type->self.type_name);
+
+ if (data_type != &stackop_type && data_type != &canary_type && len > printed)
+ printed += scnprintf(buf + printed, len - printed, " +%#x", offset);
+
+ if (annotated_data_type__get_member_name(data_type, member, sizeof(member), offset) &&
+ len > printed) {
+ printed += scnprintf(buf + printed, len - printed, " (%s)", member);
+ }
+ return printed;
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ const struct annotation_write_ops *wops,
+ struct annotation_print_data *apd)
+{
+ bool current_entry = wops->current_entry;
+ bool change_color = wops->change_color;
+ double percent_max = annotation_line__max_percent(al, annotate_opts.percent_type);
+ int width = wops->width;
+ int pcnt_width = annotation__pcnt_width(notes);
+ int cycles_width = annotation__cycles_width(notes);
bool show_title = false;
char bf[256];
int printed;
-
- if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+ void *obj = wops->obj;
+ int (*obj__set_color)(void *obj, int color) = wops->set_color;
+ void (*obj__set_percent_color)(void *obj, double percent, bool current) = wops->set_percent_color;
+ int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current) = wops->set_jumps_percent_color;
+ void (*obj__printf)(void *obj, const char *fmt, ...) = wops->printf;
+ void (*obj__write_graph)(void *obj, int graph) = wops->write_graph;
+
+ if (wops->first_line && (al->offset == -1 || percent_max == 0.0)) {
if (notes->branch && al->cycles) {
if (al->cycles->ipc == 0.0 && al->cycles->avg == 0)
show_title = true;
@@ -1966,7 +2042,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
for (i = 0; i < al->data_nr; i++) {
double percent;
- percent = annotation_data__percent(&al->data[i], percent_type);
+ percent = annotation_data__percent(&al->data[i],
+ annotate_opts.percent_type);
obj__set_percent_color(obj, percent, current_entry);
if (symbol_conf.show_total_period) {
@@ -1989,6 +2066,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
symbol_conf.show_nr_samples ? "Samples" : "Percent");
}
}
+ width -= pcnt_width;
if (notes->branch) {
if (al->cycles && al->cycles->ipc)
@@ -2052,11 +2130,13 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
}
}
+ width -= cycles_width;
obj__printf(obj, " ");
+ width -= 1;
if (!*al->line)
- obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+ obj__printf(obj, "%-*s", width, " ");
else if (al->offset == -1) {
if (al->line_nr && annotate_opts.show_linenr)
printed = scnprintf(bf, sizeof(bf), "%-*d ",
@@ -2065,7 +2145,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
printed = scnprintf(bf, sizeof(bf), "%-*s ",
notes->src->widths.addr, " ");
obj__printf(obj, bf);
- obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+ width -= printed;
+ obj__printf(obj, "%-*s", width, al->line);
} else {
u64 addr = al->offset;
int color = -1;
@@ -2108,22 +2189,18 @@ print_addr:
if (change_color)
obj__set_color(obj, color);
- disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+ width -= printed;
- obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
- }
+ printed = disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf),
+ obj__printf, obj__write_graph);
-}
+ obj__printf(obj, "%s", bf);
+ width -= printed;
+
+ disasm_line__snprint_type_info(disasm_line(al), bf, sizeof(bf), apd);
+ obj__printf(obj, "%-*s", width, bf);
+ }
-void annotation_line__write(struct annotation_line *al, struct annotation *notes,
- struct annotation_write_ops *wops)
-{
- __annotation_line__write(al, notes, wops->first_line, wops->current_entry,
- wops->change_color, wops->width, wops->obj,
- annotate_opts.percent_type,
- wops->set_color, wops->set_percent_color,
- wops->set_jumps_percent_color, wops->printf,
- wops->write_graph);
}
int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
@@ -2621,6 +2698,20 @@ static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc)
return false;
}
+/**
+ * Returns true if the instruction has a memory operand without
+ * performing a load/store
+ */
+static bool is_address_gen_insn(struct arch *arch, struct disasm_line *dl)
+{
+ if (arch__is(arch, "x86")) {
+ if (!strncmp(dl->ins.name, "lea", 3))
+ return true;
+ }
+
+ return false;
+}
+
static struct disasm_line *
annotation__prev_asm_line(struct annotation *notes, struct disasm_line *curr)
{
@@ -2729,6 +2820,12 @@ __hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
return &stackop_type;
}
+ if (is_address_gen_insn(arch, dl)) {
+ istat->bad++;
+ ann_data_stat.no_mem_ops++;
+ return NO_TYPE;
+ }
+
for_each_insn_op_loc(&loc, i, op_loc) {
struct data_loc_info dloc = {
.arch = arch,
@@ -2829,7 +2926,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
di_cache.dso = dso__get(map__dso(ms->map));
debuginfo__delete(di_cache.dbg);
- di_cache.dbg = debuginfo__new(dso__long_name(di_cache.dso));
+ di_cache.dbg = dso__debuginfo(di_cache.dso);
}
if (di_cache.dbg == NULL) {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 8b5131d257b0..d4990bff29a7 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -199,8 +199,20 @@ struct annotation_write_ops {
void (*write_graph)(void *obj, int graph);
};
+struct annotation_print_data {
+ struct hist_entry *he;
+ struct evsel *evsel;
+ struct arch *arch;
+ struct debuginfo *dbg;
+ /* save data type info keyed by al->offset */
+ struct hashmap *type_hash;
+ /* It'll be set in hist_entry__annotate_printf() */
+ int addr_fmt_width;
+};
+
void annotation_line__write(struct annotation_line *al, struct annotation *notes,
- struct annotation_write_ops *ops);
+ const struct annotation_write_ops *ops,
+ struct annotation_print_data *apd);
int __annotation__scnprintf_samples_period(struct annotation *notes,
char *bf, size_t size,
@@ -463,7 +475,8 @@ void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel);
void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel);
void annotated_source__purge(struct annotated_source *as);
-int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
+ struct hist_entry *he);
bool ui__has_annotation(void);
@@ -471,18 +484,6 @@ int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel);
int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel);
int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel);
-#ifdef HAVE_SLANG_SUPPORT
-int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
- struct hist_browser_timer *hbt);
-#else
-static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
- struct evsel *evsel __maybe_unused,
- struct hist_browser_timer *hbt __maybe_unused)
-{
- return 0;
-}
-#endif
-
void annotation_options__init(void);
void annotation_options__exit(void);
@@ -584,4 +585,6 @@ void debuginfo_cache__delete(void);
int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr,
int num_aggr, struct evsel *evsel);
int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header);
+
+int evsel__get_arch(struct evsel *evsel, struct arch **parch);
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
index 960062b3cb9e..ab500e0efe24 100644
--- a/tools/perf/util/arm-spe-decoder/Build
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -1 +1 @@
-perf-util-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
+perf-util-y += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 688fe6d75244..9e02b2bdd117 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -200,13 +200,61 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ST;
else
decoder->record.op |= ARM_SPE_OP_LD;
- if (SPE_OP_PKT_IS_LDST_SVE(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_LDST;
+
+ if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GP_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SIMD_FP;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MTE_TAG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
+ if (payload & SPE_OP_PKT_AR)
+ decoder->record.op |= ARM_SPE_OP_AR;
+ if (payload & SPE_OP_PKT_EXCL)
+ decoder->record.op |= ARM_SPE_OP_EXCL;
+ if (payload & SPE_OP_PKT_AT)
+ decoder->record.op |= ARM_SPE_OP_ATOMIC;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SVE;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ if (payload & SPE_OP_PKT_SVE_SG)
+ decoder->record.op |= ARM_SPE_OP_SG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMCPY;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMSET;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GCS;
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ decoder->record.op |= ARM_SPE_OP_COMM;
+ }
+
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SME;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ decoder->record.op |= ARM_SPE_OP_ASE;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_COND;
+ }
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
@@ -229,42 +277,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
}
break;
case ARM_SPE_EVENTS:
- if (payload & BIT(EV_L1D_REFILL))
- decoder->record.type |= ARM_SPE_L1D_MISS;
-
- if (payload & BIT(EV_L1D_ACCESS))
- decoder->record.type |= ARM_SPE_L1D_ACCESS;
-
- if (payload & BIT(EV_TLB_WALK))
- decoder->record.type |= ARM_SPE_TLB_MISS;
-
- if (payload & BIT(EV_TLB_ACCESS))
- decoder->record.type |= ARM_SPE_TLB_ACCESS;
-
- if (payload & BIT(EV_LLC_MISS))
- decoder->record.type |= ARM_SPE_LLC_MISS;
-
- if (payload & BIT(EV_LLC_ACCESS))
- decoder->record.type |= ARM_SPE_LLC_ACCESS;
-
- if (payload & BIT(EV_REMOTE_ACCESS))
- decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
-
- if (payload & BIT(EV_MISPRED))
- decoder->record.type |= ARM_SPE_BRANCH_MISS;
-
- if (payload & BIT(EV_NOT_TAKEN))
- decoder->record.type |= ARM_SPE_BRANCH_NOT_TAKEN;
-
- if (payload & BIT(EV_TRANSACTIONAL))
- decoder->record.type |= ARM_SPE_IN_TXN;
-
- if (payload & BIT(EV_PARTIAL_PREDICATE))
- decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
-
- if (payload & BIT(EV_EMPTY_PREDICATE))
- decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED;
-
+ decoder->record.type = payload;
break;
case ARM_SPE_DATA_SOURCE:
decoder->record.source = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 881d9f29c138..3310e05122f0 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -13,53 +13,65 @@
#include "arm-spe-pkt-decoder.h"
-enum arm_spe_sample_type {
- ARM_SPE_L1D_ACCESS = 1 << 0,
- ARM_SPE_L1D_MISS = 1 << 1,
- ARM_SPE_LLC_ACCESS = 1 << 2,
- ARM_SPE_LLC_MISS = 1 << 3,
- ARM_SPE_TLB_ACCESS = 1 << 4,
- ARM_SPE_TLB_MISS = 1 << 5,
- ARM_SPE_BRANCH_MISS = 1 << 6,
- ARM_SPE_REMOTE_ACCESS = 1 << 7,
- ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
- ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
- ARM_SPE_BRANCH_NOT_TAKEN = 1 << 10,
- ARM_SPE_IN_TXN = 1 << 11,
-};
+#define ARM_SPE_L1D_ACCESS BIT(EV_L1D_ACCESS)
+#define ARM_SPE_L1D_MISS BIT(EV_L1D_REFILL)
+#define ARM_SPE_LLC_ACCESS BIT(EV_LLC_ACCESS)
+#define ARM_SPE_LLC_MISS BIT(EV_LLC_MISS)
+#define ARM_SPE_TLB_ACCESS BIT(EV_TLB_ACCESS)
+#define ARM_SPE_TLB_MISS BIT(EV_TLB_WALK)
+#define ARM_SPE_BRANCH_MISS BIT(EV_MISPRED)
+#define ARM_SPE_BRANCH_NOT_TAKEN BIT(EV_NOT_TAKEN)
+#define ARM_SPE_REMOTE_ACCESS BIT(EV_REMOTE_ACCESS)
+#define ARM_SPE_SVE_PARTIAL_PRED BIT(EV_PARTIAL_PREDICATE)
+#define ARM_SPE_SVE_EMPTY_PRED BIT(EV_EMPTY_PREDICATE)
+#define ARM_SPE_IN_TXN BIT(EV_TRANSACTIONAL)
+#define ARM_SPE_L2D_ACCESS BIT(EV_L2D_ACCESS)
+#define ARM_SPE_L2D_MISS BIT(EV_L2D_MISS)
+#define ARM_SPE_RECENTLY_FETCHED BIT(EV_RECENTLY_FETCHED)
+#define ARM_SPE_DATA_SNOOPED BIT(EV_DATA_SNOOPED)
+#define ARM_SPE_HITM BIT(EV_CACHE_DATA_MODIFIED)
enum arm_spe_op_type {
/* First level operation type */
ARM_SPE_OP_OTHER = 1 << 0,
ARM_SPE_OP_LDST = 1 << 1,
ARM_SPE_OP_BRANCH_ERET = 1 << 2,
+};
+
+enum arm_spe_2nd_op_ldst {
+ ARM_SPE_OP_GP_REG = 1 << 8,
+ ARM_SPE_OP_UNSPEC_REG = 1 << 9,
+ ARM_SPE_OP_NV_SYSREG = 1 << 10,
+ ARM_SPE_OP_SIMD_FP = 1 << 11,
+ ARM_SPE_OP_SVE = 1 << 12,
+ ARM_SPE_OP_MTE_TAG = 1 << 13,
+ ARM_SPE_OP_MEMCPY = 1 << 14,
+ ARM_SPE_OP_MEMSET = 1 << 15,
+ ARM_SPE_OP_GCS = 1 << 16,
+ ARM_SPE_OP_SME = 1 << 17,
+ ARM_SPE_OP_ASE = 1 << 18,
+
+ /* Assisted information for memory / SIMD */
+ ARM_SPE_OP_LD = 1 << 20,
+ ARM_SPE_OP_ST = 1 << 21,
+ ARM_SPE_OP_ATOMIC = 1 << 22,
+ ARM_SPE_OP_EXCL = 1 << 23,
+ ARM_SPE_OP_AR = 1 << 24,
+ ARM_SPE_OP_DP = 1 << 25, /* Data processing */
+ ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
+ ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
+ ARM_SPE_OP_COMM = 1 << 28, /* Common */
+ ARM_SPE_OP_FP = 1 << 29, /* Floating-point */
+ ARM_SPE_OP_COND = 1 << 30, /* Conditional */
+};
- /* Second level operation type for OTHER */
- ARM_SPE_OP_SVE_OTHER = 1 << 16,
- ARM_SPE_OP_SVE_FP = 1 << 17,
- ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18,
-
- /* Second level operation type for LDST */
- ARM_SPE_OP_LD = 1 << 16,
- ARM_SPE_OP_ST = 1 << 17,
- ARM_SPE_OP_ATOMIC = 1 << 18,
- ARM_SPE_OP_EXCL = 1 << 19,
- ARM_SPE_OP_AR = 1 << 20,
- ARM_SPE_OP_SIMD_FP = 1 << 21,
- ARM_SPE_OP_GP_REG = 1 << 22,
- ARM_SPE_OP_UNSPEC_REG = 1 << 23,
- ARM_SPE_OP_NV_SYSREG = 1 << 24,
- ARM_SPE_OP_SVE_LDST = 1 << 25,
- ARM_SPE_OP_SVE_PRED_LDST = 1 << 26,
- ARM_SPE_OP_SVE_SG = 1 << 27,
-
- /* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
- ARM_SPE_OP_BR_GCS = 1 << 18,
- ARM_SPE_OP_BR_CR_BL = 1 << 19,
- ARM_SPE_OP_BR_CR_RET = 1 << 20,
- ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
+enum arm_spe_2nd_op_branch {
+ ARM_SPE_OP_BR_COND = 1 << 8,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 9,
+ ARM_SPE_OP_BR_GCS = 1 << 10,
+ ARM_SPE_OP_BR_CR_BL = 1 << 11,
+ ARM_SPE_OP_BR_CR_RET = 1 << 12,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13,
};
enum arm_spe_common_data_source {
@@ -100,7 +112,7 @@ enum arm_spe_hisi_hip_data_source {
};
struct arm_spe_record {
- enum arm_spe_sample_type type;
+ u64 type;
int err;
u32 op;
u32 latency;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 13cadb2f1cea..5769ba2f4140 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -314,6 +314,20 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
if (payload & BIT(EV_EMPTY_PREDICATE))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED");
+ if (payload & BIT(EV_L2D_ACCESS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-ACCESS");
+ if (payload & BIT(EV_L2D_MISS))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-MISS");
+ if (payload & BIT(EV_CACHE_DATA_MODIFIED))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " HITM");
+ if (payload & BIT(EV_RECENTLY_FETCHED))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " LFB");
+ if (payload & BIT(EV_DATA_SNOOPED))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SNOOPED");
+ if (payload & BIT(EV_STREAMING_SVE_MODE))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " STREAMING-SVE");
+ if (payload & BIT(EV_SMCU))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SMCU");
return err;
}
@@ -326,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
switch (packet->index) {
case SPE_OP_PKT_HDR_CLASS_OTHER:
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
/* SVE effective vector length */
@@ -337,8 +351,21 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
if (payload & SPE_OP_PKT_SVE_PRED)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
- } else {
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER");
+
+ /* SME effective vector length or tile size */
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d",
+ SPE_OP_PKG_SME_ETS(payload));
+
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE");
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
payload & SPE_OP_PKT_COND ?
"COND-SELECT" : "INSN-OTHER");
@@ -348,42 +375,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len,
payload & 0x1 ? "ST" : "LD");
- if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+ if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
if (payload & SPE_OP_PKT_AT)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
if (payload & SPE_OP_PKT_EXCL)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
if (payload & SPE_OP_PKT_AR)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
- }
-
- switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
- case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMSET:
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
- break;
- default:
- break;
- }
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG");
- if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
/* SVE effective vector length */
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
SPE_OP_PKG_SVE_EVL(payload));
@@ -392,6 +407,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
if (payload & SPE_OP_PKT_SVE_SG)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM");
}
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 2cdf9f6da268..adf4cde320aa 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -108,6 +108,13 @@ enum arm_spe_events {
EV_TRANSACTIONAL = 16,
EV_PARTIAL_PREDICATE = 17,
EV_EMPTY_PREDICATE = 18,
+ EV_L2D_ACCESS = 19,
+ EV_L2D_MISS = 20,
+ EV_CACHE_DATA_MODIFIED = 21,
+ EV_RECENTLY_FETCHED = 22,
+ EV_DATA_SNOOPED = 23,
+ EV_STREAMING_SVE_MODE = 24,
+ EV_SMCU = 25,
};
/* Operation packet header */
@@ -116,25 +123,39 @@ enum arm_spe_events {
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
-#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88)
-#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
-#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
-#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
-#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10
-#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30
-#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25
+#define SPE_OP_PKT_OTHER_ASE BIT(2)
+#define SPE_OP_PKT_OTHER_FP BIT(1)
-#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+/*
+ * SME effective vector length or tile size (ETS) is stored in byte 0
+ * bits [6:4,2]; the length is rounded up to a power of two and use 128
+ * as one step, so ETS calculation is:
+ *
+ * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2])
+ */
+#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \
+ (FIELD_GET(BIT(2), (v)))))
+
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30)
+#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25)
+
+#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
#define SPE_OP_PKT_AR BIT(4)
#define SPE_OP_PKT_EXCL BIT(3)
#define SPE_OP_PKT_AT BIT(2)
#define SPE_OP_PKT_ST BIT(0)
-#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
+#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
#define SPE_OP_PKT_SVE_SG BIT(7)
/*
@@ -148,6 +169,10 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
+#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40)
+
+#define SPE_OP_PKT_GCS_COMM BIT(2)
+
#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 8942fa598a84..dc19e72258f3 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -39,6 +39,23 @@
#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \
+ ARM_SPE_OP_SME | ARM_SPE_OP_ASE)))
+
+#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op))
+
+#define ARM_SPE_CACHE_EVENT(lvl) \
+ (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS)
+
+#define arm_spe_is_cache_level(type, lvl) \
+ ((type) & ARM_SPE_CACHE_EVENT(lvl))
+
+#define arm_spe_is_cache_hit(type, lvl) \
+ (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS)
+
+#define arm_spe_is_cache_miss(type, lvl) \
+ ((type) & ARM_SPE_##lvl##_MISS)
+
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -62,7 +79,6 @@ struct arm_spe {
u8 sample_remote_access;
u8 sample_memory;
u8 sample_instructions;
- u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -101,7 +117,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
- u64 period_instructions;
+ u64 sample_count;
u32 flags;
struct branch_stack *last_branch;
};
@@ -228,7 +244,6 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
- speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -305,15 +320,28 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
return 0;
}
-static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
+static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu)
{
u64 i;
if (!spe->metadata)
return NULL;
+ /* CPU ID is -1 for per-thread mode */
+ if (cpu < 0) {
+ /*
+ * On the heterogeneous system, due to CPU ID is -1,
+ * cannot confirm the data source packet is supported.
+ */
+ if (!spe->is_homogeneous)
+ return NULL;
+
+ /* In homogeneous system, simply use CPU0's metadata */
+ return spe->metadata[0];
+ }
+
for (i = 0; i < spe->metadata_nr_cpu; i++)
- if (spe->metadata[i][ARM_SPE_CPU] == cpu)
+ if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu)
return spe->metadata[i];
return NULL;
@@ -323,10 +351,7 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *
{
struct simd_flags simd_flags = {};
- if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
- simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
-
- if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
+ if (record->op & ARM_SPE_OP_SVE)
simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
@@ -352,7 +377,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
sample->pid = speq->pid;
sample->tid = speq->tid;
- sample->period = 1;
+ sample->period = spe->synth_opts.period;
sample->cpu = speq->cpu;
sample->simd_flags = arm_spe__synth_simd_flags(record);
@@ -471,7 +496,8 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
}
static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
- u64 spe_events_id, u64 data_src)
+ u64 spe_events_id,
+ union perf_mem_data_src data_src)
{
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
@@ -486,7 +512,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.stream_id = spe_events_id;
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
- sample.data_src = data_src;
+ sample.data_src = data_src.val;
sample.weight = record->latency;
ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
@@ -519,7 +545,8 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
- u64 spe_events_id, u64 data_src)
+ u64 spe_events_id,
+ union perf_mem_data_src data_src)
{
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
@@ -527,14 +554,6 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
struct perf_sample sample;
int ret;
- /*
- * Handles perf instruction sampling period.
- */
- speq->period_instructions++;
- if (speq->period_instructions < spe->instructions_sample_period)
- return 0;
- speq->period_instructions = 0;
-
perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
@@ -542,8 +561,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
sample.phys_addr = record->phys_addr;
- sample.data_src = data_src;
- sample.period = spe->instructions_sample_period;
+ sample.data_src = data_src.val;
sample.weight = record->latency;
sample.flags = speq->flags;
sample.branch_stack = speq->last_branch;
@@ -554,15 +572,21 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
}
static const struct midr_range common_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
{},
};
@@ -670,8 +694,8 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
* socket
*/
case ARM_SPE_COMMON_DS_REMOTE:
- data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
- data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
@@ -819,30 +843,121 @@ static const struct data_source_handle data_source_handles[] = {
DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
};
-static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
- union perf_mem_data_src *data_src)
+static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
{
- if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
- data_src->mem_lvl = PERF_MEM_LVL_L3;
+ /*
+ * To find a cache hit, search in ascending order from the lower level
+ * caches to the higher level caches. This reflects the best scenario
+ * for a cache hit.
+ */
+ if (arm_spe_is_cache_hit(record->type, L1D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ } else if (record->type & ARM_SPE_RECENTLY_FETCHED) {
+ data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB;
+ } else if (arm_spe_is_cache_hit(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_hit(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ /*
+ * To find a cache miss, search in descending order from the higher
+ * level cache to the lower level cache. This represents the worst
+ * scenario for a cache miss.
+ */
+ } else if (arm_spe_is_cache_miss(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ } else if (arm_spe_is_cache_miss(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_miss(record->type, L1D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ }
+}
- if (record->type & ARM_SPE_LLC_MISS)
- data_src->mem_lvl |= PERF_MEM_LVL_MISS;
- else
- data_src->mem_lvl |= PERF_MEM_LVL_HIT;
- } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
+static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ /* Record the greatest level info for a store operation. */
+ if (arm_spe_is_cache_level(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ } else if (arm_spe_is_cache_level(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_level(record->type, L1D)) {
data_src->mem_lvl = PERF_MEM_LVL_L1;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ }
+}
- if (record->type & ARM_SPE_L1D_MISS)
- data_src->mem_lvl |= PERF_MEM_LVL_MISS;
- else
- data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+static void arm_spe__synth_memory_level(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ struct arm_spe *spe = speq->spe;
+
+ /*
+ * The data source packet contains more info for cache levels for
+ * peer snooping. So respect the memory level if has been set by
+ * data source parsing.
+ */
+ if (!data_src->mem_lvl) {
+ if (data_src->mem_op == PERF_MEM_OP_LOAD)
+ arm_spe__synth_ld_memory_level(record, data_src);
+ if (data_src->mem_op == PERF_MEM_OP_STORE)
+ arm_spe__synth_st_memory_level(record, data_src);
+ }
+
+ if (!data_src->mem_lvl) {
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
}
- if (record->type & ARM_SPE_REMOTE_ACCESS)
- data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+ /*
+ * If 'mem_snoop' has been set by data source packet, skip to set
+ * it at here.
+ */
+ if (!data_src->mem_snoop) {
+ if (record->type & ARM_SPE_DATA_SNOOPED) {
+ if (record->type & ARM_SPE_HITM)
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ else
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ } else {
+ u64 *metadata =
+ arm_spe__get_metadata_by_cpu(spe, speq->cpu);
+
+ /*
+ * Set NA ("Not available") mode if no meta data or the
+ * SNOOPED event is not supported.
+ */
+ if (!metadata ||
+ !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED))
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+ else
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ }
+ }
+
+ if (!data_src->mem_remote) {
+ if (record->type & ARM_SPE_REMOTE_ACCESS)
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ }
}
-static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
+static void arm_spe__synth_ds(struct arm_spe_queue *speq,
const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
@@ -859,56 +974,40 @@ static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
cpuid = perf_env__cpuid(perf_session__env(spe->session));
midr = strtol(cpuid, NULL, 16);
} else {
- /* CPU ID is -1 for per-thread mode */
- if (speq->cpu < 0) {
- /*
- * On the heterogeneous system, due to CPU ID is -1,
- * cannot confirm the data source packet is supported.
- */
- if (!spe->is_homogeneous)
- return false;
-
- /* In homogeneous system, simply use CPU0's metadata */
- if (spe->metadata)
- metadata = spe->metadata[0];
- } else {
- metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
- }
-
+ metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
if (!metadata)
- return false;
+ return;
midr = metadata[ARM_SPE_CPU_MIDR];
}
for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
- data_source_handles[i].ds_synth(record, data_src);
- return true;
+ return data_source_handles[i].ds_synth(record, data_src);
}
}
- return false;
+ return;
}
-static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
- const struct arm_spe_record *record)
+static union perf_mem_data_src
+arm_spe__synth_data_source(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record)
{
- union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
+ union perf_mem_data_src data_src = {};
- /* Only synthesize data source for LDST operations */
- if (!is_ldst_op(record->op))
- return 0;
+ if (!is_mem_op(record->op))
+ return data_src;
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
else if (record->op & ARM_SPE_OP_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
- return 0;
+ data_src.mem_op = PERF_MEM_OP_NA;
- if (!arm_spe__synth_ds(speq, record, &data_src))
- arm_spe__synth_memory_level(record, &data_src);
+ arm_spe__synth_ds(speq, record, &data_src);
+ arm_spe__synth_memory_level(speq, record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK;
@@ -919,16 +1018,24 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
}
- return data_src.val;
+ return data_src;
}
static int arm_spe_sample(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
struct arm_spe *spe = speq->spe;
- u64 data_src;
+ union perf_mem_data_src data_src;
int err;
+ /*
+ * Discard all samples until period is reached
+ */
+ speq->sample_count++;
+ if (speq->sample_count < spe->synth_opts.period)
+ return 0;
+ speq->sample_count = 0;
+
arm_spe__sample_flags(speq);
data_src = arm_spe__synth_data_source(speq, record);
@@ -998,11 +1105,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
- /*
- * When data_src is zero it means the record is not a memory operation,
- * skip to synthesize memory sample for this case.
- */
- if (spe->sample_memory && is_ldst_op(record->op)) {
+ if (spe->sample_memory && is_mem_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
@@ -1532,6 +1635,7 @@ static const char * const metadata_per_cpu_fmts[] = {
[ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n",
[ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n",
[ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n",
+ [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n",
};
static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
@@ -1628,12 +1732,10 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.exclude_guest = evsel->core.attr.exclude_guest;
attr.sample_id_all = evsel->core.attr.sample_id_all;
attr.read_format = evsel->core.attr.read_format;
+ attr.sample_period = spe->synth_opts.period;
/* create new id val to be a fixed offset from evsel id */
- id = evsel->core.id[0] + 1000000000;
-
- if (!id)
- id = 1;
+ id = auxtrace_synth_id_range_start(evsel);
if (spe->synth_opts.flc) {
spe->sample_flc = true;
@@ -1744,25 +1846,15 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
}
if (spe->synth_opts.instructions) {
- if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
- pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
- goto synth_instructions_out;
- }
- if (spe->synth_opts.period > 1)
- pr_warning("Arm SPE has a hardware-based sample period.\n"
- "Additional instruction events will be discarded by --itrace\n");
-
spe->sample_instructions = true;
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
- attr.sample_period = spe->synth_opts.period;
- spe->instructions_sample_period = attr.sample_period;
+
err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->instructions_id = id;
arm_spe_set_event_name(evlist, id, "instructions");
}
-synth_instructions_out:
return 0;
}
@@ -1871,10 +1963,23 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
if (dump_trace)
return 0;
- if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
spe->synth_opts = *session->itrace_synth_opts;
- else
+ } else {
itrace_synth_opts__set_default(&spe->synth_opts, false);
+ /* Default nanoseconds period not supported */
+ spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ spe->synth_opts.period = 1;
+ }
+
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ if (spe->synth_opts.period > 1)
+ ui__warning("Arm SPE has a hardware-based sampling period.\n\n"
+ "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n");
err = arm_spe_synth_events(spe, session);
if (err)
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
index 390679a4af2f..3966df1856d8 100644
--- a/tools/perf/util/arm-spe.h
+++ b/tools/perf/util/arm-spe.h
@@ -47,6 +47,8 @@ enum {
ARM_SPE_CPU_PMU_TYPE,
/* Minimal interval */
ARM_SPE_CAP_MIN_IVAL,
+ /* Event filter */
+ ARM_SPE_CAP_EVENT_FILTER,
ARM_SPE_CPU_PRIV_MAX,
};
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ebd32f1b8f12..a224687ffbc1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -55,12 +55,29 @@
#include "hisi-ptt.h"
#include "s390-cpumsf.h"
#include "util/mmap.h"
+#include "powerpc-vpadtl.h"
#include <linux/ctype.h>
#include "symbol/kallsyms.h"
#include <internal/lib.h>
#include "util/sample.h"
+#define AUXTRACE_SYNTH_EVENT_ID_OFFSET 1000000000ULL
+
+/*
+ * Event IDs are allocated sequentially, so a big offset from any
+ * existing ID will reach a unused range.
+ */
+u64 auxtrace_synth_id_range_start(struct evsel *evsel)
+{
+ u64 id = evsel->core.id[0] + AUXTRACE_SYNTH_EVENT_ID_OFFSET;
+
+ if (!id)
+ id = 1;
+
+ return id;
+}
+
/*
* Make a group from 'leader' to 'last', requiring that the events were not
* already grouped to a different leader.
@@ -185,10 +202,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
if (per_cpu) {
mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
- if (evlist->core.threads)
- mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
- else
- mp->tid = -1;
+ mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
} else {
mp->cpu.cpu = -1;
mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
@@ -1365,7 +1379,8 @@ static void unleader_auxtrace(struct perf_session *session)
}
}
-int perf_event__process_auxtrace_info(struct perf_session *session,
+int perf_event__process_auxtrace_info(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
enum auxtrace_type type = event->auxtrace_info.type;
@@ -1393,6 +1408,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session,
case PERF_AUXTRACE_HISI_PTT:
err = hisi_ptt_process_auxtrace_info(event, session);
break;
+ case PERF_AUXTRACE_VPA_DTL:
+ err = powerpc_vpadtl_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
@@ -1406,7 +1424,8 @@ int perf_event__process_auxtrace_info(struct perf_session *session,
return 0;
}
-s64 perf_event__process_auxtrace(struct perf_session *session,
+s64 perf_event__process_auxtrace(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
s64 err;
@@ -1803,7 +1822,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats)
}
}
-int perf_event__process_auxtrace_error(struct perf_session *session,
+int perf_event__process_auxtrace_error(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (auxtrace__dont_decode(session))
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index f001cbb68f8e..6947f3f284c0 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -8,16 +8,11 @@
#define __PERF_AUXTRACE_H
#include <sys/types.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <stddef.h>
#include <stdio.h> // FILE
-#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
-#include <perf/cpumap.h>
-#include <asm/bitsperlong.h>
#include <asm/barrier.h>
+#include <perf/cpumap.h>
union perf_event;
struct perf_session;
@@ -50,6 +45,7 @@ enum auxtrace_type {
PERF_AUXTRACE_ARM_SPE,
PERF_AUXTRACE_S390_CPUMSF,
PERF_AUXTRACE_HISI_PTT,
+ PERF_AUXTRACE_VPA_DTL,
};
enum itrace_period_type {
@@ -458,8 +454,6 @@ struct addr_filters {
struct auxtrace_cache;
-#ifdef HAVE_AUXTRACE_SUPPORT
-
u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm);
int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail);
@@ -614,11 +608,14 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int
int code, int cpu, pid_t pid, pid_t tid, u64 ip,
const char *msg, u64 timestamp);
-int perf_event__process_auxtrace_info(struct perf_session *session,
+int perf_event__process_auxtrace_info(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
-s64 perf_event__process_auxtrace(struct perf_session *session,
+s64 perf_event__process_auxtrace(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
-int perf_event__process_auxtrace_error(struct perf_session *session,
+int perf_event__process_auxtrace_error(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
const char *str, int unset);
@@ -647,6 +644,7 @@ void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
struct evsel *evsel);
+u64 auxtrace_synth_id_range_start(struct evsel *evsel);
#define ITRACE_HELP \
" i[period]: synthesize instructions events\n" \
@@ -701,212 +699,4 @@ void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
opts->range_num = 0;
}
-#else
-#include "debug.h"
-
-static inline struct auxtrace_record *
-auxtrace_record__init(struct evlist *evlist __maybe_unused,
- int *err)
-{
- *err = 0;
- return NULL;
-}
-
-static inline
-void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
-{
-}
-
-static inline
-int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
- struct evlist *evlist __maybe_unused,
- struct record_opts *opts __maybe_unused)
-{
- return 0;
-}
-
-static inline
-int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- return 0;
-}
-
-static inline
-s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- return 0;
-}
-
-static inline
-int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- return 0;
-}
-
-static inline
-void perf_session__auxtrace_error_inc(struct perf_session *session
- __maybe_unused,
- union perf_event *event
- __maybe_unused)
-{
-}
-
-static inline
-void events_stats__auxtrace_error_warn(const struct events_stats *stats
- __maybe_unused)
-{
-}
-
-static inline
-int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused,
- const char *str __maybe_unused, int unset __maybe_unused)
-{
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-static inline
-int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
- const char *str __maybe_unused,
- int unset __maybe_unused)
-{
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-static inline
-int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
- struct record_opts *opts __maybe_unused,
- const char *str)
-{
- if (!str)
- return 0;
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-static inline
-int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
- struct evlist *evlist __maybe_unused,
- struct record_opts *opts __maybe_unused,
- const char *str)
-{
- if (!str)
- return 0;
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-static inline
-int auxtrace_parse_aux_action(struct evlist *evlist __maybe_unused)
-{
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-static inline
-int auxtrace__process_event(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- const struct perf_tool *tool __maybe_unused)
-{
- return 0;
-}
-
-static inline
-void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused,
- struct perf_sample *sample __maybe_unused)
-{
-}
-
-static inline
-int auxtrace__flush_events(struct perf_session *session __maybe_unused,
- const struct perf_tool *tool __maybe_unused)
-{
- return 0;
-}
-
-static inline
-void auxtrace__free_events(struct perf_session *session __maybe_unused)
-{
-}
-
-static inline
-void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
-{
-}
-
-static inline
-void auxtrace__free(struct perf_session *session __maybe_unused)
-{
-}
-
-static inline
-int auxtrace_index__write(int fd __maybe_unused,
- struct list_head *head __maybe_unused)
-{
- return -EINVAL;
-}
-
-static inline
-int auxtrace_index__process(int fd __maybe_unused,
- u64 size __maybe_unused,
- struct perf_session *session __maybe_unused,
- bool needs_swap __maybe_unused)
-{
- return -EINVAL;
-}
-
-static inline
-void auxtrace_index__free(struct list_head *head __maybe_unused)
-{
-}
-
-static inline
-bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
- struct evsel *evsel __maybe_unused)
-{
- return false;
-}
-
-static inline
-int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
-{
- return 0;
-}
-
-int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
- struct auxtrace_mmap_params *mp,
- void *userpg, int fd);
-void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
-void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
- off_t auxtrace_offset,
- unsigned int auxtrace_pages,
- bool auxtrace_overwrite);
-void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist,
- struct evsel *evsel, int idx);
-
-#define ITRACE_HELP ""
-
-static inline
-void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
- __maybe_unused,
- struct perf_time_interval *ptime_range
- __maybe_unused,
- int range_num __maybe_unused)
-{
-}
-
-static inline
-void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
- __maybe_unused)
-{
-}
-
-#endif
-
#endif
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 5b6d3e899e11..2298cd396c42 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -657,9 +657,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
info_node->info_linear = info_linear;
info_node->metadata = NULL;
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
- free(info_linear);
+ /*
+ * Insert failed, likely because of a duplicate event
+ * made by the sideband thread. Ignore synthesizing the
+ * metadata.
+ */
free(info_node);
+ goto out;
}
+ /* info_linear is now owned by info_node and shouldn't be freed below. */
info_linear = NULL;
/*
@@ -827,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
return err;
}
-static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
+static int perf_env__add_bpf_info(struct perf_env *env, u32 id)
{
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct btf *btf = NULL;
u64 arrays;
u32 btf_id;
- int fd;
+ int fd, err = 0;
fd = bpf_prog_get_fd_by_id(id);
if (fd < 0)
- return;
+ return -EINVAL;
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
@@ -852,6 +858,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+ err = PTR_ERR(info_linear);
goto out;
}
@@ -862,38 +869,46 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_node->info_linear = info_linear;
info_node->metadata = bpf_metadata_create(&info_linear->info);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
+ pr_debug("%s: duplicate add bpf info request for id %u\n",
+ __func__, btf_id);
free(info_linear);
free(info_node);
+ goto out;
}
- } else
+ } else {
free(info_linear);
+ err = -ENOMEM;
+ goto out;
+ }
if (btf_id == 0)
goto out;
btf = btf__load_from_kernel_by_id(btf_id);
- if (libbpf_get_error(btf)) {
- pr_debug("%s: failed to get BTF of id %u, aborting\n",
- __func__, btf_id);
- goto out;
+ if (!btf) {
+ err = -errno;
+ pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err);
+ } else {
+ perf_env__fetch_btf(env, btf_id, btf);
}
- perf_env__fetch_btf(env, btf_id, btf);
out:
btf__free(btf);
close(fd);
+ return err;
}
static int bpf_event__sb_cb(union perf_event *event, void *data)
{
struct perf_env *env = data;
+ int ret = 0;
if (event->header.type != PERF_RECORD_BPF_EVENT)
return -1;
switch (event->bpf.type) {
case PERF_BPF_EVENT_PROG_LOAD:
- perf_env__add_bpf_info(env, event->bpf.id);
+ ret = perf_env__add_bpf_info(env, event->bpf.id);
case PERF_BPF_EVENT_PROG_UNLOAD:
/*
@@ -907,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data)
break;
}
- return 0;
+ return ret;
}
int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index a0b11f35395f..1a2e7b388d57 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -56,6 +56,7 @@
#include "util/debug.h"
#include "util/evsel.h"
#include "util/target.h"
+#include "util/bpf-utils.h"
#include "util/bpf-filter.h"
#include <util/bpf-filter-flex.h>
@@ -451,8 +452,12 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
struct bpf_link *link;
struct perf_bpf_filter_entry *entry;
bool needs_idx_hash = !target__has_cpu(target);
+#if LIBBPF_CURRENT_VERSION_GEQ(1, 7)
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts,
.dont_enable = true);
+#else
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+#endif
entry = calloc(MAX_FILTERS, sizeof(*entry));
if (entry == NULL)
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
index 122477f2de44..818c554b91b2 100644
--- a/tools/perf/util/bpf-filter.h
+++ b/tools/perf/util/bpf-filter.h
@@ -36,6 +36,8 @@ int perf_bpf_filter__unpin(void);
#else /* !HAVE_BPF_SKEL */
+#include <errno.h>
+
static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused,
const char *str __maybe_unused)
{
diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c
index 69fb165da206..cf6e1e4402d5 100644
--- a/tools/perf/util/bpf-trace-summary.c
+++ b/tools/perf/util/bpf-trace-summary.c
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <errno.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
@@ -138,11 +139,14 @@ static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
return key1 == key2;
}
-static int print_common_stats(struct syscall_data *data, FILE *fp)
+static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp)
{
int printed = 0;
- for (int i = 0; i < data->nr_nodes; i++) {
+ if (max_summary == 0 || max_summary > data->nr_nodes)
+ max_summary = data->nr_nodes;
+
+ for (int i = 0; i < max_summary; i++) {
struct syscall_node *node = &data->nodes[i];
struct syscall_stats *stat = &node->stats;
double total = (double)(stat->total_time) / NSEC_PER_MSEC;
@@ -200,7 +204,7 @@ static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key
return 0;
}
-static int print_thread_stat(struct syscall_data *data, FILE *fp)
+static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp)
{
int printed = 0;
@@ -213,18 +217,18 @@ static int print_thread_stat(struct syscall_data *data, FILE *fp)
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
- printed += print_common_stats(data, fp);
+ printed += print_common_stats(data, max_summary, fp);
printed += fprintf(fp, "\n\n");
return printed;
}
-static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp)
+static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
{
int printed = 0;
for (int i = 0; i < nr_data; i++)
- printed += print_thread_stat(data[i], fp);
+ printed += print_thread_stat(data[i], max_summary, fp);
return printed;
}
@@ -277,7 +281,7 @@ static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
return 0;
}
-static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
+static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
{
int printed = 0;
int nr_events = 0;
@@ -291,8 +295,11 @@ static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
- for (int i = 0; i < nr_data; i++)
- printed += print_common_stats(data[i], fp);
+ if (max_summary == 0 || max_summary > nr_data)
+ max_summary = nr_data;
+
+ for (int i = 0; i < max_summary; i++)
+ printed += print_common_stats(data[i], max_summary, fp);
printed += fprintf(fp, "\n\n");
return printed;
@@ -333,7 +340,7 @@ static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key
return 0;
}
-static int print_cgroup_stat(struct syscall_data *data, FILE *fp)
+static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp)
{
int printed = 0;
struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
@@ -351,23 +358,23 @@ static int print_cgroup_stat(struct syscall_data *data, FILE *fp)
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
- printed += print_common_stats(data, fp);
+ printed += print_common_stats(data, max_summary, fp);
printed += fprintf(fp, "\n\n");
return printed;
}
-static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp)
+static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
{
int printed = 0;
for (int i = 0; i < nr_data; i++)
- printed += print_cgroup_stat(data[i], fp);
+ printed += print_cgroup_stat(data[i], max_summary, fp);
return printed;
}
-int trace_print_bpf_summary(FILE *fp)
+int trace_print_bpf_summary(FILE *fp, int max_summary)
{
struct bpf_map *map = skel->maps.syscall_stats_map;
struct syscall_key *prev_key, key;
@@ -420,13 +427,13 @@ int trace_print_bpf_summary(FILE *fp)
switch (skel->rodata->aggr_mode) {
case SYSCALL_AGGR_THREAD:
- printed += print_thread_stats(data, nr_data, fp);
+ printed += print_thread_stats(data, nr_data, max_summary, fp);
break;
case SYSCALL_AGGR_CPU:
- printed += print_total_stats(data, nr_data, fp);
+ printed += print_total_stats(data, nr_data, max_summary, fp);
break;
case SYSCALL_AGGR_CGROUP:
- printed += print_cgroup_stats(data, nr_data, fp);
+ printed += print_cgroup_stats(data, nr_data, max_summary, fp);
break;
default:
break;
diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c
index 80b1d2b3729b..5a66dc8594aa 100644
--- a/tools/perf/util/bpf-utils.c
+++ b/tools/perf/util/bpf-utils.c
@@ -20,7 +20,7 @@ struct bpil_array_desc {
*/
};
-static struct bpil_array_desc bpil_array_desc[] = {
+static const struct bpil_array_desc bpil_array_desc[] = {
[PERF_BPIL_JITED_INSNS] = {
offsetof(struct bpf_prog_info, jited_prog_insns),
offsetof(struct bpf_prog_info, jited_prog_len),
@@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
__u32 info_len = sizeof(info);
__u32 data_len = 0;
int i, err;
- void *ptr;
+ __u8 *ptr;
if (arrays >> PERF_BPIL_LAST_ARRAY)
return ERR_PTR(-EINVAL);
@@ -126,15 +126,15 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
pr_debug("can't get prog info: %s", strerror(errno));
return ERR_PTR(-EFAULT);
}
+ if (info.type >= __MAX_BPF_PROG_TYPE)
+ pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type);
/* step 2: calculate total size of all arrays */
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
bool include_array = (arrays & (1UL << i)) > 0;
- struct bpil_array_desc *desc;
__u32 count, size;
- desc = bpil_array_desc + i;
-
/* kernel is too old to support this field */
if (info_len < desc->array_offset + sizeof(__u32) ||
info_len < desc->count_offset + sizeof(__u32) ||
@@ -163,19 +163,20 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
ptr = info_linear->data;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u32 count, size;
if ((arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
bpf_prog_info_set_offset_u32(&info_linear->info,
desc->count_offset, count);
bpf_prog_info_set_offset_u32(&info_linear->info,
desc->size_offset, size);
+ assert(ptr >= info_linear->data);
+ assert(ptr < &info_linear->data[data_len]);
bpf_prog_info_set_offset_u64(&info_linear->info,
desc->array_offset,
ptr_to_u64(ptr));
@@ -189,27 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
free(info_linear);
return ERR_PTR(-EFAULT);
}
+ if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) {
+ pr_debug("%s:%d: unexpected program type %u\n",
+ __func__, __LINE__, info_linear->info.type);
+ }
/* step 6: verify the data */
+ ptr = info_linear->data;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
- __u32 v1, v2;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
+ __u32 count1, count2, size1, size2;
+ __u64 ptr2;
if ((arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
- v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ count2 = bpf_prog_info_read_offset_u32(&info_linear->info,
desc->count_offset);
- if (v1 != v2)
- pr_warning("%s: mismatch in element count\n", __func__);
+ if (count1 != count2) {
+ pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
- v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ size2 = bpf_prog_info_read_offset_u32(&info_linear->info,
desc->size_offset);
- if (v1 != v2)
- pr_warning("%s: mismatch in rec size\n", __func__);
+ if (size1 != size2) {
+ pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
+ ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset);
+ if (ptr_to_u64(ptr) != ptr2) {
+ pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
+ ptr += roundup(count1 * size1, sizeof(__u64));
}
/* step 7: update info_len and data_len */
@@ -224,13 +243,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear)
int i;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u64 addr, offs;
if ((info_linear->arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
addr = bpf_prog_info_read_offset_u64(&info_linear->info,
desc->array_offset);
offs = addr - ptr_to_u64(info_linear->data);
@@ -244,13 +262,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear)
int i;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u64 addr, offs;
if ((info_linear->arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
offs = bpf_prog_info_read_offset_u64(&info_linear->info,
desc->array_offset);
addr = offs + ptr_to_u64(info_linear->data);
diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h
index 86a5055cdfad..a8bc1a232968 100644
--- a/tools/perf/util/bpf-utils.h
+++ b/tools/perf/util/bpf-utils.h
@@ -8,6 +8,16 @@
#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/libbpf.h>
+#include <bpf/libbpf_version.h>
+
+#define LIBBPF_CURRENT_VERSION_GEQ(major, minor) \
+ (LIBBPF_MAJOR_VERSION > (major) || \
+ (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor)))
+
+#if LIBBPF_CURRENT_VERSION_GEQ(1, 7)
+// libbpf 1.7+ support the btf_dump_type_data_opts.emit_strings option.
+#define HAVE_LIBBPF_STRINGS_SUPPORT 1
+#endif
/*
* Get bpf_prog_info in continuous memory
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 73fcafbffc6a..a5882b582205 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -6,10 +6,14 @@
#include <limits.h>
#include <unistd.h>
#include <sys/file.h>
+#include <sys/resource.h>
#include <sys/time.h>
#include <linux/err.h>
+#include <linux/list.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
#include <perf/bpf_perf.h>
#include "bpf_counter.h"
@@ -28,13 +32,67 @@
#include "bpf_skel/bperf_leader.skel.h"
#include "bpf_skel/bperf_follower.skel.h"
+struct bpf_counter {
+ void *skel;
+ struct list_head list;
+};
+
#define ATTR_MAP_SIZE 16
-static inline void *u64_to_ptr(__u64 ptr)
+static void *u64_to_ptr(__u64 ptr)
{
return (void *)(unsigned long)ptr;
}
+
+void set_max_rlimit(void)
+{
+ struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
+static __u32 bpf_link_get_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.id;
+}
+
+static __u32 bpf_link_get_prog_id(int fd)
+{
+ struct bpf_link_info link_info = { .id = 0, };
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.prog_id;
+}
+
+static __u32 bpf_map_get_id(int fd)
+{
+ struct bpf_map_info map_info = { .id = 0, };
+ __u32 map_info_len = sizeof(map_info);
+
+ bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+ return map_info.id;
+}
+
+/* trigger the leader program on a cpu */
+int bperf_trigger_reading(int prog_fd, int cpu)
+{
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = NULL,
+ .ctx_size_in = 0,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ .cpu = cpu,
+ .retval = 0,
+ );
+
+ return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
static struct bpf_counter *bpf_counter_alloc(void)
{
struct bpf_counter *counter;
@@ -278,6 +336,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx
{
struct bpf_prog_profiler_bpf *skel;
struct bpf_counter *counter;
+ int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu;
int ret;
list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
@@ -285,7 +344,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu_map_idx, &fd, BPF_ANY);
+ &cpu, &fd, BPF_ANY);
if (ret)
return ret;
}
@@ -393,7 +452,6 @@ static int bperf_check_target(struct evsel *evsel,
return 0;
}
-static struct perf_cpu_map *all_cpu_map;
static __u32 filter_entry_cnt;
static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
@@ -402,6 +460,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
int link_fd, diff_map_fd, err;
struct bpf_link *link = NULL;
+ struct perf_thread_map *threads;
if (!skel) {
pr_err("Failed to open leader skeleton\n");
@@ -437,7 +496,11 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
* following evsel__open_per_cpu call
*/
evsel->leader_skel = skel;
- evsel__open_per_cpu(evsel, all_cpu_map, -1);
+ assert(!perf_cpu_map__has_any_cpu_or_is_empty(evsel->core.cpus));
+ /* Always open system wide. */
+ threads = thread_map__new_by_tid(-1);
+ evsel__open(evsel, evsel->core.cpus, threads);
+ perf_thread_map__put(threads);
out:
bperf_leader_bpf__destroy(skel);
@@ -475,12 +538,6 @@ static int bperf__load(struct evsel *evsel, struct target *target)
if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
return -1;
- if (!all_cpu_map) {
- all_cpu_map = perf_cpu_map__new_online_cpus();
- if (!all_cpu_map)
- return -1;
- }
-
evsel->bperf_leader_prog_fd = -1;
evsel->bperf_leader_link_fd = -1;
@@ -598,9 +655,10 @@ out:
static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
struct bperf_leader_bpf *skel = evsel->leader_skel;
+ int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu;
return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu_map_idx, &fd, BPF_ANY);
+ &cpu, &fd, BPF_ANY);
}
/*
@@ -609,13 +667,12 @@ static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
*/
static int bperf_sync_counters(struct evsel *evsel)
{
- int num_cpu, i, cpu;
+ struct perf_cpu cpu;
+ int idx;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus)
+ bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu.cpu);
- num_cpu = perf_cpu_map__nr(all_cpu_map);
- for (i = 0; i < num_cpu; i++) {
- cpu = perf_cpu_map__cpu(all_cpu_map, i).cpu;
- bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
- }
return 0;
}
@@ -785,7 +842,7 @@ struct bpf_counter_ops bperf_ops = {
extern struct bpf_counter_ops bperf_cgrp_ops;
-static inline bool bpf_counter_skip(struct evsel *evsel)
+static bool bpf_counter_skip(struct evsel *evsel)
{
return evsel->bpf_counter_ops == NULL;
}
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index c6d21c07b14c..658d8e7d507e 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -2,18 +2,10 @@
#ifndef __PERF_BPF_COUNTER_H
#define __PERF_BPF_COUNTER_H 1
-#include <linux/list.h>
-#include <sys/resource.h>
-
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#endif
-
struct evsel;
struct target;
-struct bpf_counter;
+
+#ifdef HAVE_BPF_SKEL
typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
@@ -22,6 +14,7 @@ typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
int cpu_map_idx,
int fd);
+/* Shared ops between bpf_counter, bpf_counter_cgroup, etc. */
struct bpf_counter_ops {
bpf_counter_evsel_target_op load;
bpf_counter_evsel_op enable;
@@ -31,13 +24,6 @@ struct bpf_counter_ops {
bpf_counter_evsel_install_pe_op install_pe;
};
-struct bpf_counter {
- void *skel;
- struct list_head list;
-};
-
-#ifdef HAVE_BPF_SKEL
-
int bpf_counter__load(struct evsel *evsel, struct target *target);
int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__disable(struct evsel *evsel);
@@ -45,6 +31,9 @@ int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
+int bperf_trigger_reading(int prog_fd, int cpu);
+void set_max_rlimit(void);
+
#else /* HAVE_BPF_SKEL */
#include <linux/err.h>
@@ -83,55 +72,4 @@ static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused,
#endif /* HAVE_BPF_SKEL */
-static inline void set_max_rlimit(void)
-{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
-}
-
-#ifdef HAVE_BPF_SKEL
-
-static inline __u32 bpf_link_get_id(int fd)
-{
- struct bpf_link_info link_info = { .id = 0, };
- __u32 link_info_len = sizeof(link_info);
-
- bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
- return link_info.id;
-}
-
-static inline __u32 bpf_link_get_prog_id(int fd)
-{
- struct bpf_link_info link_info = { .id = 0, };
- __u32 link_info_len = sizeof(link_info);
-
- bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
- return link_info.prog_id;
-}
-
-static inline __u32 bpf_map_get_id(int fd)
-{
- struct bpf_map_info map_info = { .id = 0, };
- __u32 map_info_len = sizeof(map_info);
-
- bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
- return map_info.id;
-}
-
-/* trigger the leader program on a cpu */
-static inline int bperf_trigger_reading(int prog_fd, int cpu)
-{
- DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
- .ctx_in = NULL,
- .ctx_size_in = 0,
- .flags = BPF_F_TEST_RUN_ON_CPU,
- .cpu = cpu,
- .retval = 0,
- );
-
- return bpf_prog_test_run_opts(prog_fd, &opts);
-}
-#endif /* HAVE_BPF_SKEL */
-
#endif /* __PERF_BPF_COUNTER_H */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index 6ff42619de12..17d7196c6589 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -4,6 +4,7 @@
/* Copyright (c) 2021 Google */
#include <assert.h>
+#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include <sys/file.h>
@@ -13,6 +14,7 @@
#include <linux/zalloc.h>
#include <linux/perf_event.h>
#include <api/fs/fs.h>
+#include <bpf/bpf.h>
#include <perf/bpf_perf.h>
#include "affinity.h"
@@ -26,6 +28,7 @@
#include "cpumap.h"
#include "thread_map.h"
+#include "bpf_skel/bperf_cgroup.h"
#include "bpf_skel/bperf_cgroup.skel.h"
static struct perf_event_attr cgrp_switch_attr = {
@@ -41,6 +44,55 @@ static struct bperf_cgroup_bpf *skel;
#define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
+static void setup_rodata(struct bperf_cgroup_bpf *sk, int evlist_size)
+{
+ int map_size, total_cpus = cpu__max_cpu().cpu;
+
+ sk->rodata->num_cpus = total_cpus;
+ sk->rodata->num_events = evlist_size / nr_cgroups;
+
+ if (cgroup_is_v2("perf_event") > 0)
+ sk->rodata->use_cgroup_v2 = 1;
+
+ BUG_ON(evlist_size % nr_cgroups != 0);
+
+ /* we need one copy of events per cpu for reading */
+ map_size = total_cpus * evlist_size / nr_cgroups;
+ bpf_map__set_max_entries(sk->maps.events, map_size);
+ bpf_map__set_max_entries(sk->maps.cgrp_idx, nr_cgroups);
+ /* previous result is saved in a per-cpu array */
+ map_size = evlist_size / nr_cgroups;
+ bpf_map__set_max_entries(sk->maps.prev_readings, map_size);
+ /* cgroup result needs all events (per-cpu) */
+ map_size = evlist_size;
+ bpf_map__set_max_entries(sk->maps.cgrp_readings, map_size);
+}
+
+static void test_max_events_program_load(void)
+{
+#ifndef NDEBUG
+ /*
+ * Test that the program verifies with the maximum number of events. If
+ * this test fails unfortunately perf needs recompiling with a lower
+ * BPERF_CGROUP__MAX_EVENTS to avoid BPF verifier issues.
+ */
+ int err, max_events = BPERF_CGROUP__MAX_EVENTS * nr_cgroups;
+ struct bperf_cgroup_bpf *test_skel = bperf_cgroup_bpf__open();
+
+ if (!test_skel) {
+ pr_err("Failed to open cgroup skeleton\n");
+ return;
+ }
+ setup_rodata(test_skel, max_events);
+ err = bperf_cgroup_bpf__load(test_skel);
+ if (err) {
+ pr_err("Failed to load cgroup skeleton with max events %d.\n",
+ BPERF_CGROUP__MAX_EVENTS);
+ }
+ bperf_cgroup_bpf__destroy(test_skel);
+#endif
+}
+
static int bperf_load_program(struct evlist *evlist)
{
struct bpf_link *link;
@@ -49,35 +101,18 @@ static int bperf_load_program(struct evlist *evlist)
int i, j;
struct perf_cpu cpu;
int total_cpus = cpu__max_cpu().cpu;
- int map_size, map_fd;
- int prog_fd, err;
+ int map_fd, prog_fd, err;
+
+ set_max_rlimit();
+
+ test_max_events_program_load();
skel = bperf_cgroup_bpf__open();
if (!skel) {
pr_err("Failed to open cgroup skeleton\n");
return -1;
}
-
- skel->rodata->num_cpus = total_cpus;
- skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
-
- if (cgroup_is_v2("perf_event") > 0)
- skel->rodata->use_cgroup_v2 = 1;
-
- BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
-
- /* we need one copy of events per cpu for reading */
- map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
- bpf_map__set_max_entries(skel->maps.events, map_size);
- bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
- /* previous result is saved in a per-cpu array */
- map_size = evlist->core.nr_entries / nr_cgroups;
- bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
- /* cgroup result needs all events (per-cpu) */
- map_size = evlist->core.nr_entries;
- bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
-
- set_max_rlimit();
+ setup_rodata(skel, evlist->core.nr_entries);
err = bperf_cgroup_bpf__load(skel);
if (err) {
@@ -185,7 +220,8 @@ static int bperf_cgrp__load(struct evsel *evsel,
}
static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
- int cpu __maybe_unused, int fd __maybe_unused)
+ int cpu_map_idx __maybe_unused,
+ int fd __maybe_unused)
{
/* nothing to do */
return 0;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 0cb02412043c..c456d24efa30 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -1,8 +1,10 @@
-#include <stdio.h>
+#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <bpf/bpf.h>
#include <linux/err.h>
#include "util/ftrace.h"
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 60b81d586323..7b5671f13c53 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -184,6 +184,9 @@ int lock_contention_prepare(struct lock_contention *con)
struct evlist *evlist = con->evlist;
struct target *target = con->target;
+ /* make sure it loads the kernel map before lookup */
+ map__load(machine__kernel_map(con->machine));
+
skel = lock_contention_bpf__open();
if (!skel) {
pr_err("Failed to open lock-contention BPF skeleton\n");
@@ -749,9 +752,6 @@ int lock_contention_read(struct lock_contention *con)
bpf_prog_test_run_opts(prog_fd, &opts);
}
- /* make sure it loads the kernel map */
- maps__load_first(machine->kmaps);
-
prev_key = NULL;
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
s64 ls_key;
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
index 578f27d2d6b4..442f91b4e8e1 100644
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -5,6 +5,7 @@
#include <bpf/libbpf.h>
#include <linux/err.h>
#include <linux/kernel.h>
+#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index c367fefe6ecb..88e0660c4bff 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -13,6 +13,7 @@
#include "util/cgroup.h"
#include "util/strlist.h"
#include <bpf/bpf.h>
+#include <bpf/btf.h>
#include <internal/xyarray.h>
#include <linux/time64.h>
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index cb86e261b4de..2a6e61864ee0 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -45,7 +45,7 @@ struct syscalls_sys_enter {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__type(key, __u32);
__type(value, __u32);
- __uint(max_entries, 512);
+ __uint(max_entries, 1024);
} syscalls_sys_enter SEC(".maps");
/*
@@ -57,7 +57,7 @@ struct syscalls_sys_exit {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__type(key, __u32);
__type(value, __u32);
- __uint(max_entries, 512);
+ __uint(max_entries, 1024);
} syscalls_sys_exit SEC(".maps");
struct syscall_enter_args {
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 57cab7647a9a..c2298a2decc9 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -1,14 +1,12 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2021 Facebook
// Copyright (c) 2021 Google
+#include "bperf_cgroup.h"
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary
-#define MAX_EVENTS 32 // max events per cgroup: arbitrary
-
// NOTE: many of map and global data will be modified before loading
// from the userspace (perf tool) using the skeleton helpers.
@@ -97,7 +95,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
level = BPF_CORE_READ(cgrp, level);
- for (cnt = 0; i < MAX_LEVELS; i++) {
+ for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) {
__u64 cgrp_id;
if (i > level)
@@ -123,7 +121,7 @@ static inline int get_cgroup_v2_idx(__u32 *cgrps, int size)
__u32 *elem;
int cnt;
- for (cnt = 0; i < MAX_LEVELS; i++) {
+ for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) {
__u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i);
if (cgrp_id == 0)
@@ -148,17 +146,17 @@ static int bperf_cgroup_count(void)
register int c = 0;
struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val;
__u32 cpu = bpf_get_smp_processor_id();
- __u32 cgrp_idx[MAX_LEVELS];
+ __u32 cgrp_idx[BPERF_CGROUP__MAX_LEVELS];
int cgrp_cnt;
__u32 key, cgrp;
long err;
if (use_cgroup_v2)
- cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS);
+ cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS);
else
- cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS);
+ cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS);
- for ( ; idx < MAX_EVENTS; idx++) {
+ for ( ; idx < BPERF_CGROUP__MAX_EVENTS; idx++) {
if (idx == num_events)
break;
@@ -186,7 +184,7 @@ static int bperf_cgroup_count(void)
delta.enabled = val.enabled - prev_val->enabled;
delta.running = val.running - prev_val->running;
- for (c = 0; c < MAX_LEVELS; c++) {
+ for (c = 0; c < BPERF_CGROUP__MAX_LEVELS; c++) {
if (c == cgrp_cnt)
break;
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.h b/tools/perf/util/bpf_skel/bperf_cgroup.h
new file mode 100644
index 000000000000..3fb84b19d39a
--- /dev/null
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Data structures shared between BPF and tools. */
+#ifndef __BPERF_CGROUP_H
+#define __BPERF_CGROUP_H
+
+// These constants impact code size of bperf_cgroup.bpf.c that may result in BPF
+// verifier issues. They are exposed to control the size and also to disable BPF
+// counters when the number of user events is too large.
+
+// max cgroup hierarchy level: arbitrary
+#define BPERF_CGROUP__MAX_LEVELS 10
+// max events per cgroup: arbitrary
+#define BPERF_CGROUP__MAX_EVENTS 128
+
+#endif /* __BPERF_CGROUP_H */
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
index 73e32e063030..6673386302e2 100644
--- a/tools/perf/util/bpf_skel/kwork_top.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -18,9 +18,7 @@ enum kwork_class_type {
};
#define MAX_ENTRIES 102400
-#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 4096
-#endif
#define PF_KTHREAD 0x00200000
#define MAX_COMMAND_LEN 16
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index b195e6efeb8b..e5666d4c1722 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -164,7 +164,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
if (entry->part == 8) {
union perf_mem_data_src___new *data = (void *)&kctx->data->data_src;
- if (bpf_core_field_exists(data->mem_hops))
+ if (__builtin_preserve_field_info(data->mem_hops, BPF_FIELD_EXISTS))
return data->mem_hops;
return 0;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index a7018a3b0437..fdb35133fde4 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -86,6 +86,13 @@ int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size
{
size_t offs = 0;
+ if (build_id->size == 0) {
+ /* Ensure bf is always \0 terminated. */
+ if (bf_size > 0)
+ bf[0] = '\0';
+ return 0;
+ }
+
for (size_t i = 0; i < build_id->size && offs < bf_size; ++i)
offs += snprintf(bf + offs, bf_size - offs, "%02x", build_id->data[i]);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index d7b7eef740b9..428e5350d7a2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
if (tok) {
unsigned long size;
- size = strtoul(tok, &name, 0);
- if (size < (unsigned) sysctl__max_stack())
- param->max_stack = size;
+ if (!strncmp(tok, "defer", sizeof("defer"))) {
+ param->defer = true;
+ } else {
+ size = strtoul(tok, &name, 0);
+ if (size < (unsigned) sysctl__max_stack())
+ param->max_stack = size;
+ }
}
break;
@@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
} while (0);
free(buf);
+
+ if (param->defer && param->record_mode != CALLCHAIN_FP) {
+ pr_err("callchain: deferred callchain only works with FP\n");
+ return -EINVAL;
+ }
+
return ret;
}
@@ -1828,3 +1838,38 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
}
return 0;
}
+
+/*
+ * This function merges earlier samples (@sample_orig) waiting for deferred
+ * user callchains with the matching callchain record (@sample_callchain)
+ * which is delivered now. The @sample_orig->callchain should be released
+ * after use if ->deferred_callchain is set.
+ */
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain)
+{
+ u64 nr_orig = sample_orig->callchain->nr - 1;
+ u64 nr_deferred = sample_callchain->callchain->nr;
+ struct ip_callchain *callchain;
+
+ if (sample_orig->callchain->nr < 2) {
+ sample_orig->deferred_callchain = false;
+ return -EINVAL;
+ }
+
+ callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
+ if (callchain == NULL) {
+ sample_orig->deferred_callchain = false;
+ return -ENOMEM;
+ }
+
+ callchain->nr = nr_orig + nr_deferred;
+ /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
+ memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
+ /* copy deferred user callchains */
+ memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
+ nr_deferred * sizeof(u64));
+
+ sample_orig->callchain = callchain;
+ return 0;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 86ed9e4d04f9..2a52af8c80ac 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
struct callchain_param {
bool enabled;
+ bool defer;
enum perf_call_graph_mode record_mode;
u32 dump_size;
enum chain_mode mode;
@@ -317,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
struct perf_sample *sample, int max_stack,
bool symbols, callchain_iter_fn cb, void *data);
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/capstone.c b/tools/perf/util/capstone.c
new file mode 100644
index 000000000000..be5fd44b1f9d
--- /dev/null
+++ b/tools/perf/util/capstone.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "capstone.h"
+#include "annotate.h"
+#include "addr_location.h"
+#include "debug.h"
+#include "disasm.h"
+#include "dso.h"
+#include "machine.h"
+#include "map.h"
+#include "namespaces.h"
+#include "print_insn.h"
+#include "symbol.h"
+#include "thread.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+#include <capstone/capstone.h>
+#endif
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+static int capstone_init(struct machine *machine, csh *cs_handle, bool is64,
+ bool disassembler_style)
+{
+ cs_arch arch;
+ cs_mode mode;
+
+ if (machine__is(machine, "x86_64") && is64) {
+ arch = CS_ARCH_X86;
+ mode = CS_MODE_64;
+ } else if (machine__normalized_is(machine, "x86")) {
+ arch = CS_ARCH_X86;
+ mode = CS_MODE_32;
+ } else if (machine__normalized_is(machine, "arm64")) {
+ arch = CS_ARCH_ARM64;
+ mode = CS_MODE_ARM;
+ } else if (machine__normalized_is(machine, "arm")) {
+ arch = CS_ARCH_ARM;
+ mode = CS_MODE_ARM + CS_MODE_V8;
+ } else if (machine__normalized_is(machine, "s390")) {
+ arch = CS_ARCH_SYSZ;
+ mode = CS_MODE_BIG_ENDIAN;
+ } else {
+ return -1;
+ }
+
+ if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
+ pr_warning_once("cs_open failed\n");
+ return -1;
+ }
+
+ if (machine__normalized_is(machine, "x86")) {
+ /*
+ * In case of using capstone_init while symbol__disassemble
+ * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
+ * is set via annotation args
+ */
+ if (disassembler_style)
+ cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+ /*
+ * Resolving address operands to symbols is implemented
+ * on x86 by investigating instruction details.
+ */
+ cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn,
+ int print_opts, FILE *fp)
+{
+ struct addr_location al;
+ size_t printed = 0;
+
+ if (insn->detail && insn->detail->x86.op_count == 1) {
+ cs_x86_op *op = &insn->detail->x86.operands[0];
+
+ addr_location__init(&al);
+ if (op->type == X86_OP_IMM &&
+ thread__find_symbol(thread, cpumode, op->imm, &al)) {
+ printed += fprintf(fp, "%s ", insn[0].mnemonic);
+ printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ if (print_opts & PRINT_INSN_IMM_HEX)
+ printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
+ addr_location__exit(&al);
+ return printed;
+ }
+ addr_location__exit(&al);
+ }
+
+ printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+ return printed;
+}
+#endif
+
+
+ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused,
+ struct thread *thread __maybe_unused,
+ u8 cpumode __maybe_unused, bool is64bit __maybe_unused,
+ const uint8_t *code __maybe_unused,
+ size_t code_size __maybe_unused,
+ uint64_t ip __maybe_unused, int *lenp __maybe_unused,
+ int print_opts __maybe_unused, FILE *fp __maybe_unused)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ size_t printed;
+ cs_insn *insn;
+ csh cs_handle;
+ size_t count;
+ int ret;
+
+ /* TODO: Try to initiate capstone only once but need a proper place. */
+ ret = capstone_init(machine, &cs_handle, is64bit, true);
+ if (ret < 0)
+ return ret;
+
+ count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
+ if (count > 0) {
+ if (machine__normalized_is(machine, "x86"))
+ printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
+ else
+ printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+ if (lenp)
+ *lenp = insn->size;
+ cs_free(insn, count);
+ } else {
+ printed = -1;
+ }
+
+ cs_close(&cs_handle);
+ return printed;
+#else
+ return -1;
+#endif
+}
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
+ struct annotate_args *args, u64 addr)
+{
+ int i;
+ struct map *map = args->ms.map;
+ struct symbol *sym;
+
+ /* TODO: support more architectures */
+ if (!arch__is(args->arch, "x86"))
+ return;
+
+ if (insn->detail == NULL)
+ return;
+
+ for (i = 0; i < insn->detail->x86.op_count; i++) {
+ cs_x86_op *op = &insn->detail->x86.operands[i];
+ u64 orig_addr;
+
+ if (op->type != X86_OP_MEM)
+ continue;
+
+ /* only print RIP-based global symbols for now */
+ if (op->mem.base != X86_REG_RIP)
+ continue;
+
+ /* get the target address */
+ orig_addr = addr + insn->size + op->mem.disp;
+ addr = map__objdump_2mem(map, orig_addr);
+
+ if (dso__kernel(map__dso(map))) {
+ /*
+ * The kernel maps can be split into sections, let's
+ * find the map first and the search the symbol.
+ */
+ map = maps__find(map__kmaps(map), addr);
+ if (map == NULL)
+ continue;
+ }
+
+ /* convert it to map-relative address for search */
+ addr = map__map_ip(map, addr);
+
+ sym = map__find_symbol(map, addr);
+ if (sym == NULL)
+ continue;
+
+ if (addr == sym->start) {
+ scnprintf(buf, len, "\t# %"PRIx64" <%s>",
+ orig_addr, sym->name);
+ } else {
+ scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
+ orig_addr, sym->name, addr - sym->start);
+ }
+ break;
+ }
+}
+#endif
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+struct find_file_offset_data {
+ u64 ip;
+ u64 offset;
+};
+
+/* This will be called for each PHDR in an ELF binary */
+static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
+{
+ struct find_file_offset_data *data = arg;
+
+ if (start <= data->ip && data->ip < start + len) {
+ data->offset = pgoff + data->ip - start;
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+int symbol__disassemble_capstone(const char *filename __maybe_unused,
+ struct symbol *sym __maybe_unused,
+ struct annotate_args *args __maybe_unused)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 offset;
+ int i, count, free_count;
+ bool is_64bit = false;
+ bool needs_cs_close = false;
+ /* Malloc-ed buffer containing instructions read from disk. */
+ u8 *code_buf = NULL;
+ /* Pointer to code to be disassembled. */
+ const u8 *buf;
+ u64 buf_len;
+ csh handle;
+ cs_insn *insn = NULL;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+ bool disassembler_style = false;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ buf = dso__read_symbol(dso, filename, map, sym,
+ &code_buf, &buf_len, &is_64bit);
+ if (buf == NULL)
+ return errno;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ if (!args->options->disassembler_style ||
+ !strcmp(args->options->disassembler_style, "att"))
+ disassembler_style = true;
+
+ if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
+ goto err;
+
+ needs_cs_close = true;
+
+ free_count = count = cs_disasm(handle, buf, buf_len, start, buf_len, &insn);
+ for (i = 0, offset = 0; i < count; i++) {
+ int printed;
+
+ printed = scnprintf(disasm_buf, sizeof(disasm_buf),
+ " %-7s %s",
+ insn[i].mnemonic, insn[i].op_str);
+ print_capstone_detail(&insn[i], disasm_buf + printed,
+ sizeof(disasm_buf) - printed, args,
+ start + offset);
+
+ args->offset = offset;
+ args->line = disasm_buf;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ offset += insn[i].size;
+ }
+
+ /* It failed in the middle: probably due to unknown instructions */
+ if (offset != buf_len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ if (needs_cs_close) {
+ cs_close(&handle);
+ if (free_count > 0)
+ cs_free(insn, free_count);
+ }
+ free(code_buf);
+ return count < 0 ? count : 0;
+
+err:
+ if (needs_cs_close) {
+ struct disasm_line *tmp;
+
+ /*
+ * It probably failed in the middle of the above loop.
+ * Release any resources it might add.
+ */
+ list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
+ list_del(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ }
+ count = -1;
+ goto out;
+#else
+ return -1;
+#endif
+}
+
+int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused,
+ struct symbol *sym __maybe_unused,
+ struct annotate_args *args __maybe_unused)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ struct nscookie nsc;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ u64 len = end - start;
+ u64 offset;
+ int i, fd, count;
+ bool is_64bit = false;
+ bool needs_cs_close = false;
+ u8 *buf = NULL;
+ struct find_file_offset_data data = {
+ .ip = start,
+ };
+ csh handle;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+ u32 *line;
+ bool disassembler_style = false;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
+ fd = open(filename, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return -1;
+
+ if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
+ &is_64bit) == 0)
+ goto err;
+
+ if (!args->options->disassembler_style ||
+ !strcmp(args->options->disassembler_style, "att"))
+ disassembler_style = true;
+
+ if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
+ goto err;
+
+ needs_cs_close = true;
+
+ buf = malloc(len);
+ if (buf == NULL)
+ goto err;
+
+ count = pread(fd, buf, len, data.offset);
+ close(fd);
+ fd = -1;
+
+ if ((u64)count != len)
+ goto err;
+
+ line = (u32 *)buf;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ /*
+ * TODO: enable disassm for powerpc
+ * count = cs_disasm(handle, buf, len, start, len, &insn);
+ *
+ * For now, only binary code is saved in disassembled line
+ * to be used in "type" and "typeoff" sort keys. Each raw code
+ * is 32 bit instruction. So use "len/4" to get the number of
+ * entries.
+ */
+ count = len/4;
+
+ for (i = 0, offset = 0; i < count; i++) {
+ args->offset = offset;
+ sprintf(args->line, "%x", line[i]);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ break;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ offset += 4;
+ }
+
+ /* It failed in the middle */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ if (needs_cs_close)
+ cs_close(&handle);
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ count = -1;
+ goto out;
+#else
+ return -1;
+#endif
+}
diff --git a/tools/perf/util/capstone.h b/tools/perf/util/capstone.h
new file mode 100644
index 000000000000..0f030ea034b6
--- /dev/null
+++ b/tools/perf/util/capstone.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CAPSTONE_H
+#define __PERF_CAPSTONE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/types.h>
+
+struct annotate_args;
+struct machine;
+struct symbol;
+struct thread;
+
+ssize_t capstone__fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode,
+ bool is64bit, const uint8_t *code, size_t code_size,
+ uint64_t ip, int *lenp, int print_opts, FILE *fp);
+int symbol__disassemble_capstone(const char *filename, struct symbol *sym,
+ struct annotate_args *args);
+int symbol__disassemble_capstone_powerpc(const char *filename, struct symbol *sym,
+ struct annotate_args *args);
+
+#endif /* __PERF_CAPSTONE_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 25e2769b5e74..040eb75f0804 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -10,6 +10,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/statfs.h>
+#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index ae72b66b6ded..e0219bc6330a 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -19,7 +19,7 @@
#include "util/hist.h" /* perf_hist_config */
#include "util/stat.h" /* perf_stat__set_big_num */
#include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */
-#include "util/srcline.h" /* addr2line_timeout_ms */
+#include "util/addr2line.h" /* addr2line_timeout_ms */
#include "build-id.h"
#include "debug.h"
#include "config.h"
@@ -37,6 +37,8 @@
#define METRIC_ONLY_LEN 20
+static struct stats walltime_nsecs_stats;
+
struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.aggr_level = MAX_CACHE_LVL + 1,
@@ -45,7 +47,6 @@ struct perf_stat_config stat_config = {
.run_count = 1,
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
- .ru_stats = &ru_stats,
.big_num = true,
.ctl_fd = -1,
.ctl_fd_ack = -1,
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 89570397a4b3..a80845038a5e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -684,16 +684,21 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
unsigned char *bitmap;
struct perf_cpu c, last_cpu = perf_cpu_map__max(map);
- if (buf == NULL)
+ if (buf == NULL || size == 0)
return 0;
+ if (last_cpu.cpu < 0) {
+ buf[0] = '\0';
+ return 0;
+ }
+
bitmap = zalloc(last_cpu.cpu / 8 + 1);
if (bitmap == NULL) {
buf[0] = '\0';
return 0;
}
- perf_cpu_map__for_each_cpu(c, idx, map)
+ perf_cpu_map__for_each_cpu_skip_any(c, idx, map)
bitmap[c.cpu / 8] |= 1 << (c.cpu % 8);
for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
index 056d665f7f88..27550db2aa4c 100644
--- a/tools/perf/util/cs-etm-decoder/Build
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -1 +1 @@
-perf-util-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
+perf-util-y += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index b85a8837bddc..3050fe212666 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -588,6 +588,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const ocsd_generic_trace_elem *elem)
{
ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
+ ocsd_gen_trc_elem_t type;
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
struct cs_etm_queue *etmq = decoder->data;
struct cs_etm_packet_queue *packet_queue;
@@ -597,52 +598,29 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
if (!packet_queue)
return OCSD_RESP_FATAL_SYS_ERR;
- switch (elem->elem_type) {
- case OCSD_GEN_TRC_ELEM_UNKNOWN:
- break;
- case OCSD_GEN_TRC_ELEM_EO_TRACE:
- case OCSD_GEN_TRC_ELEM_NO_SYNC:
- case OCSD_GEN_TRC_ELEM_TRACE_ON:
+ type = elem->elem_type;
+
+ if (type == OCSD_GEN_TRC_ELEM_EO_TRACE ||
+ type == OCSD_GEN_TRC_ELEM_NO_SYNC ||
+ type == OCSD_GEN_TRC_ELEM_TRACE_ON)
resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
trace_chan_id);
- break;
- case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+ else if (type == OCSD_GEN_TRC_ELEM_INSTR_RANGE)
resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
trace_chan_id);
- break;
- case OCSD_GEN_TRC_ELEM_EXCEPTION:
+ else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION)
resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
trace_chan_id);
- break;
- case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+ else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION_RET)
resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
trace_chan_id);
- break;
- case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+ else if (type == OCSD_GEN_TRC_ELEM_TIMESTAMP)
resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
trace_chan_id,
indx);
- break;
- case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+ else if (type == OCSD_GEN_TRC_ELEM_PE_CONTEXT)
resp = cs_etm_decoder__set_tid(etmq, packet_queue,
elem, trace_chan_id);
- break;
- /* Unused packet types */
- case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
- case OCSD_GEN_TRC_ELEM_ADDR_NACC:
- case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
- case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
- case OCSD_GEN_TRC_ELEM_EVENT:
- case OCSD_GEN_TRC_ELEM_SWTRACE:
- case OCSD_GEN_TRC_ELEM_CUSTOM:
- case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
- case OCSD_GEN_TRC_ELEM_MEMTRANS:
-#if (OCSD_VER_NUM >= 0x010400)
- case OCSD_GEN_TRC_ELEM_INSTRUMENTATION:
-#endif
- default:
- break;
- }
return resp;
}
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 30f4bb3e7fa3..25d56e0f1c07 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -777,7 +777,7 @@ static void cs_etm__packet_dump(const char *pkt_string, void *data)
char queue_nr[64];
if (verbose)
- snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
+ snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
else
queue_nr[0] = '\0';
@@ -1726,10 +1726,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.read_format = evsel->core.attr.read_format;
/* create new id val to be a fixed offset from evsel id */
- id = evsel->core.id[0] + 1000000000;
-
- if (!id)
- id = 1;
+ id = auxtrace_synth_id_range_start(evsel);
if (etm->synth_opts.branches) {
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index a44c70f93156..4a559b3e8cdc 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -110,8 +110,12 @@ struct debuginfo *debuginfo__new(const char *path)
if (!dso)
goto out;
- /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
- if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
+ /*
+ * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block
+ * incase the path isn't for a regular file.
+ */
+ assert(!dso__has_build_id(dso));
+ if (filename__read_build_id(path, &bid) > 0)
dso__set_build_id(dso, &bid);
for (type = distro_dwarf_types;
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index b1e4919d016f..50b9433f3f8e 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -14,13 +14,15 @@
#include "annotate.h"
#include "annotate-data.h"
#include "build-id.h"
+#include "capstone.h"
#include "debug.h"
#include "disasm.h"
-#include "disasm_bpf.h"
#include "dso.h"
#include "dwarf-regs.h"
#include "env.h"
#include "evsel.h"
+#include "libbfd.h"
+#include "llvm.h"
#include "map.h"
#include "maps.h"
#include "namespaces.h"
@@ -49,7 +51,6 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args);
-static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
{
@@ -246,8 +247,8 @@ static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
}
-int ins__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
+static int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
{
if (ins->ops->scnprintf)
return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
@@ -390,13 +391,16 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s
* skip over possible up to 2 operands to get to address, e.g.:
* tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
*/
- if (c++ != NULL) {
+ if (c != NULL) {
+ c++;
ops->target.addr = strtoull(c, NULL, 16);
if (!ops->target.addr) {
c = strchr(c, ',');
c = validate_comma(c, ops);
- if (c++ != NULL)
+ if (c != NULL) {
+ c++;
ops->target.addr = strtoull(c, NULL, 16);
+ }
}
} else {
ops->target.addr = strtoull(ops->raw, NULL, 16);
@@ -824,7 +828,7 @@ static struct ins_ops ret_ops = {
.scnprintf = ins__raw_scnprintf,
};
-bool ins__is_nop(const struct ins *ins)
+static bool ins__is_nop(const struct ins *ins)
{
return ins->ops == &nop_ops;
}
@@ -1330,420 +1334,6 @@ fallback:
return 0;
}
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-#include <capstone/capstone.h>
-
-int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
-
-static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
- csh *handle)
-{
- struct annotation_options *opt = args->options;
- cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32;
-
- /* TODO: support more architectures */
- if (!arch__is(args->arch, "x86"))
- return -1;
-
- if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK)
- return -1;
-
- if (!opt->disassembler_style ||
- !strcmp(opt->disassembler_style, "att"))
- cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
-
- /*
- * Resolving address operands to symbols is implemented
- * on x86 by investigating instruction details.
- */
- cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON);
-
- return 0;
-}
-#endif
-
-#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT)
-struct find_file_offset_data {
- u64 ip;
- u64 offset;
-};
-
-/* This will be called for each PHDR in an ELF binary */
-static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
-{
- struct find_file_offset_data *data = arg;
-
- if (start <= data->ip && data->ip < start + len) {
- data->offset = pgoff + data->ip - start;
- return 1;
- }
- return 0;
-}
-
-static u8 *
-read_symbol(const char *filename, struct map *map, struct symbol *sym,
- u64 *len, bool *is_64bit)
-{
- struct dso *dso = map__dso(map);
- struct nscookie nsc;
- u64 start = map__rip_2objdump(map, sym->start);
- u64 end = map__rip_2objdump(map, sym->end);
- int fd, count;
- u8 *buf = NULL;
- struct find_file_offset_data data = {
- .ip = start,
- };
-
- *is_64bit = false;
-
- nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
- fd = open(filename, O_RDONLY);
- nsinfo__mountns_exit(&nsc);
- if (fd < 0)
- return NULL;
-
- if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
- is_64bit) == 0)
- goto err;
-
- *len = end - start;
- buf = malloc(*len);
- if (buf == NULL)
- goto err;
-
- count = pread(fd, buf, *len, data.offset);
- close(fd);
- fd = -1;
-
- if ((u64)count != *len)
- goto err;
-
- return buf;
-
-err:
- if (fd >= 0)
- close(fd);
- free(buf);
- return NULL;
-}
-#endif
-
-#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT)
-static void symbol__disassembler_missing(const char *disassembler, const char *filename,
- struct symbol *sym)
-{
- pr_debug("The %s disassembler isn't linked in for %s in %s\n",
- disassembler, sym->name, filename);
-}
-#endif
-
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
- struct annotate_args *args, u64 addr)
-{
- int i;
- struct map *map = args->ms.map;
- struct symbol *sym;
-
- /* TODO: support more architectures */
- if (!arch__is(args->arch, "x86"))
- return;
-
- if (insn->detail == NULL)
- return;
-
- for (i = 0; i < insn->detail->x86.op_count; i++) {
- cs_x86_op *op = &insn->detail->x86.operands[i];
- u64 orig_addr;
-
- if (op->type != X86_OP_MEM)
- continue;
-
- /* only print RIP-based global symbols for now */
- if (op->mem.base != X86_REG_RIP)
- continue;
-
- /* get the target address */
- orig_addr = addr + insn->size + op->mem.disp;
- addr = map__objdump_2mem(map, orig_addr);
-
- if (dso__kernel(map__dso(map))) {
- /*
- * The kernel maps can be splitted into sections,
- * let's find the map first and the search the symbol.
- */
- map = maps__find(map__kmaps(map), addr);
- if (map == NULL)
- continue;
- }
-
- /* convert it to map-relative address for search */
- addr = map__map_ip(map, addr);
-
- sym = map__find_symbol(map, addr);
- if (sym == NULL)
- continue;
-
- if (addr == sym->start) {
- scnprintf(buf, len, "\t# %"PRIx64" <%s>",
- orig_addr, sym->name);
- } else {
- scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
- orig_addr, sym->name, addr - sym->start);
- }
- break;
- }
-}
-
-static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct map *map = args->ms.map;
- struct dso *dso = map__dso(map);
- struct nscookie nsc;
- u64 start = map__rip_2objdump(map, sym->start);
- u64 end = map__rip_2objdump(map, sym->end);
- u64 len = end - start;
- u64 offset;
- int i, fd, count;
- bool is_64bit = false;
- bool needs_cs_close = false;
- u8 *buf = NULL;
- struct find_file_offset_data data = {
- .ip = start,
- };
- csh handle;
- char disasm_buf[512];
- struct disasm_line *dl;
- u32 *line;
- bool disassembler_style = false;
-
- if (args->options->objdump_path)
- return -1;
-
- nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
- fd = open(filename, O_RDONLY);
- nsinfo__mountns_exit(&nsc);
- if (fd < 0)
- return -1;
-
- if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
- &is_64bit) == 0)
- goto err;
-
- if (!args->options->disassembler_style ||
- !strcmp(args->options->disassembler_style, "att"))
- disassembler_style = true;
-
- if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
- goto err;
-
- needs_cs_close = true;
-
- buf = malloc(len);
- if (buf == NULL)
- goto err;
-
- count = pread(fd, buf, len, data.offset);
- close(fd);
- fd = -1;
-
- if ((u64)count != len)
- goto err;
-
- line = (u32 *)buf;
-
- /* add the function address and name */
- scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
- start, sym->name);
-
- args->offset = -1;
- args->line = disasm_buf;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- goto err;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- /*
- * TODO: enable disassm for powerpc
- * count = cs_disasm(handle, buf, len, start, len, &insn);
- *
- * For now, only binary code is saved in disassembled line
- * to be used in "type" and "typeoff" sort keys. Each raw code
- * is 32 bit instruction. So use "len/4" to get the number of
- * entries.
- */
- count = len/4;
-
- for (i = 0, offset = 0; i < count; i++) {
- args->offset = offset;
- sprintf(args->line, "%x", line[i]);
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- break;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- offset += 4;
- }
-
- /* It failed in the middle */
- if (offset != len) {
- struct list_head *list = &notes->src->source;
-
- /* Discard all lines and fallback to objdump */
- while (!list_empty(list)) {
- dl = list_first_entry(list, struct disasm_line, al.node);
-
- list_del_init(&dl->al.node);
- disasm_line__free(dl);
- }
- count = -1;
- }
-
-out:
- if (needs_cs_close)
- cs_close(&handle);
- free(buf);
- return count < 0 ? count : 0;
-
-err:
- if (fd >= 0)
- close(fd);
- count = -1;
- goto out;
-}
-
-static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct map *map = args->ms.map;
- u64 start = map__rip_2objdump(map, sym->start);
- u64 len;
- u64 offset;
- int i, count, free_count;
- bool is_64bit = false;
- bool needs_cs_close = false;
- u8 *buf = NULL;
- csh handle;
- cs_insn *insn = NULL;
- char disasm_buf[512];
- struct disasm_line *dl;
-
- if (args->options->objdump_path)
- return -1;
-
- buf = read_symbol(filename, map, sym, &len, &is_64bit);
- if (buf == NULL)
- return -1;
-
- /* add the function address and name */
- scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
- start, sym->name);
-
- args->offset = -1;
- args->line = disasm_buf;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- goto err;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- if (open_capstone_handle(args, is_64bit, &handle) < 0)
- goto err;
-
- needs_cs_close = true;
-
- free_count = count = cs_disasm(handle, buf, len, start, len, &insn);
- for (i = 0, offset = 0; i < count; i++) {
- int printed;
-
- printed = scnprintf(disasm_buf, sizeof(disasm_buf),
- " %-7s %s",
- insn[i].mnemonic, insn[i].op_str);
- print_capstone_detail(&insn[i], disasm_buf + printed,
- sizeof(disasm_buf) - printed, args,
- start + offset);
-
- args->offset = offset;
- args->line = disasm_buf;
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- goto err;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- offset += insn[i].size;
- }
-
- /* It failed in the middle: probably due to unknown instructions */
- if (offset != len) {
- struct list_head *list = &notes->src->source;
-
- /* Discard all lines and fallback to objdump */
- while (!list_empty(list)) {
- dl = list_first_entry(list, struct disasm_line, al.node);
-
- list_del_init(&dl->al.node);
- disasm_line__free(dl);
- }
- count = -1;
- }
-
-out:
- if (needs_cs_close) {
- cs_close(&handle);
- if (free_count > 0)
- cs_free(insn, free_count);
- }
- free(buf);
- return count < 0 ? count : 0;
-
-err:
- if (needs_cs_close) {
- struct disasm_line *tmp;
-
- /*
- * It probably failed in the middle of the above loop.
- * Release any resources it might add.
- */
- list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
- list_del(&dl->al.node);
- disasm_line__free(dl);
- }
- }
- count = -1;
- goto out;
-}
-#else // HAVE_LIBCAPSTONE_SUPPORT
-static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
- struct annotate_args *args __maybe_unused)
-{
- symbol__disassembler_missing("capstone", filename, sym);
- return -1;
-}
-
-static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
- struct annotate_args *args __maybe_unused)
-{
- symbol__disassembler_missing("capstone powerpc", filename, sym);
- return -1;
-}
-#endif // HAVE_LIBCAPSTONE_SUPPORT
-
static int symbol__disassemble_raw(char *filename, struct symbol *sym,
struct annotate_args *args)
{
@@ -1830,201 +1420,12 @@ err:
goto out;
}
-#ifdef HAVE_LIBLLVM_SUPPORT
-#include <llvm-c/Disassembler.h>
-#include <llvm-c/Target.h>
-#include "util/llvm-c-helpers.h"
-
-struct symbol_lookup_storage {
- u64 branch_addr;
- u64 pcrel_load_addr;
-};
-
-/*
- * Whenever LLVM wants to resolve an address into a symbol, it calls this
- * callback. We don't ever actually _return_ anything (in particular, because
- * it puts quotation marks around what we return), but we use this as a hint
- * that there is a branch or PC-relative address in the expression that we
- * should add some textual annotation for after the instruction. The caller
- * will use this information to add the actual annotation.
- */
-static const char *
-symbol_lookup_callback(void *disinfo, uint64_t value,
- uint64_t *ref_type,
- uint64_t address __maybe_unused,
- const char **ref __maybe_unused)
-{
- struct symbol_lookup_storage *storage = disinfo;
-
- if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
- storage->branch_addr = value;
- else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
- storage->pcrel_load_addr = value;
- *ref_type = LLVMDisassembler_ReferenceType_InOut_None;
- return NULL;
-}
-
-static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct map *map = args->ms.map;
- struct dso *dso = map__dso(map);
- u64 start = map__rip_2objdump(map, sym->start);
- u8 *buf;
- u64 len;
- u64 pc;
- bool is_64bit;
- char triplet[64];
- char disasm_buf[2048];
- size_t disasm_len;
- struct disasm_line *dl;
- LLVMDisasmContextRef disasm = NULL;
- struct symbol_lookup_storage storage;
- char *line_storage = NULL;
- size_t line_storage_len = 0;
- int ret = -1;
-
- if (args->options->objdump_path)
- return -1;
-
- LLVMInitializeAllTargetInfos();
- LLVMInitializeAllTargetMCs();
- LLVMInitializeAllDisassemblers();
-
- buf = read_symbol(filename, map, sym, &len, &is_64bit);
- if (buf == NULL)
- return -1;
-
- if (arch__is(args->arch, "x86")) {
- if (is_64bit)
- scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux");
- else
- scnprintf(triplet, sizeof(triplet), "i686-pc-linux");
- } else {
- scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
- args->arch->name);
- }
-
- disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL,
- symbol_lookup_callback);
- if (disasm == NULL)
- goto err;
-
- if (args->options->disassembler_style &&
- !strcmp(args->options->disassembler_style, "intel"))
- LLVMSetDisasmOptions(disasm,
- LLVMDisassembler_Option_AsmPrinterVariant);
-
- /*
- * This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
- * setting AsmPrinterVariant makes a new instruction printer, making it
- * forget about the PrintImmHex flag (which is applied before if both
- * are given to the same call).
- */
- LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
-
- /* add the function address and name */
- scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
- start, sym->name);
-
- args->offset = -1;
- args->line = disasm_buf;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- goto err;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- pc = start;
- for (u64 offset = 0; offset < len; ) {
- unsigned int ins_len;
-
- storage.branch_addr = 0;
- storage.pcrel_load_addr = 0;
-
- ins_len = LLVMDisasmInstruction(disasm, buf + offset,
- len - offset, pc,
- disasm_buf, sizeof(disasm_buf));
- if (ins_len == 0)
- goto err;
- disasm_len = strlen(disasm_buf);
-
- if (storage.branch_addr != 0) {
- char *name = llvm_name_for_code(dso, filename,
- storage.branch_addr);
- if (name != NULL) {
- disasm_len += scnprintf(disasm_buf + disasm_len,
- sizeof(disasm_buf) -
- disasm_len,
- " <%s>", name);
- free(name);
- }
- }
- if (storage.pcrel_load_addr != 0) {
- char *name = llvm_name_for_data(dso, filename,
- storage.pcrel_load_addr);
- disasm_len += scnprintf(disasm_buf + disasm_len,
- sizeof(disasm_buf) - disasm_len,
- " # %#"PRIx64,
- storage.pcrel_load_addr);
- if (name) {
- disasm_len += scnprintf(disasm_buf + disasm_len,
- sizeof(disasm_buf) -
- disasm_len,
- " <%s>", name);
- free(name);
- }
- }
-
- args->offset = offset;
- args->line = expand_tabs(disasm_buf, &line_storage,
- &line_storage_len);
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
-
- llvm_addr2line(filename, pc, &args->fileloc,
- (unsigned int *)&args->line_nr, false, NULL);
-
- dl = disasm_line__new(args);
- if (dl == NULL)
- goto err;
-
- annotation_line__add(&dl->al, &notes->src->source);
-
- free(args->fileloc);
- pc += ins_len;
- offset += ins_len;
- }
-
- ret = 0;
-
-err:
- LLVMDisasmDispose(disasm);
- free(buf);
- free(line_storage);
- return ret;
-}
-#else // HAVE_LIBLLVM_SUPPORT
-static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
- struct annotate_args *args __maybe_unused)
-{
- symbol__disassembler_missing("LLVM", filename, sym);
- return -1;
-}
-#endif // HAVE_LIBLLVM_SUPPORT
-
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
* allocation fails then NULL is returned.
*/
-static char *expand_tabs(char *line, char **storage, size_t *storage_len)
+char *expand_tabs(char *line, char **storage, size_t *storage_len)
{
size_t i, src, dst, len, new_storage_len, num_tabs;
char *new_line;
@@ -2079,6 +1480,23 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
return new_line;
}
+static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ zfree(&args->line);
+ return 0;
+}
+
static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
struct annotate_args *args)
{
@@ -2103,6 +1521,12 @@ static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
struct child_process objdump_process;
int err;
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return symbol__disassemble_bpf_libbfd(sym, args);
+
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE)
+ return symbol__disassemble_bpf_image(sym, args);
+
err = asprintf(&command,
"%s %s%s --start-address=0x%016" PRIx64
" --stop-address=0x%016" PRIx64
@@ -2237,11 +1661,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
- if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
- return symbol__disassemble_bpf(sym, args);
- } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
- return symbol__disassemble_bpf_image(sym, args);
- } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE;
} else if (dso__is_kcore(dso)) {
kce.addr = map__rip_2objdump(map, sym->start);
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
index c135db2416b5..d2cb555e4a3b 100644
--- a/tools/perf/util/disasm.h
+++ b/tools/perf/util/disasm.h
@@ -98,7 +98,6 @@ struct ins_ops {
struct annotate_args {
struct arch *arch;
struct map_symbol ms;
- struct evsel *evsel;
struct annotation_options *options;
s64 offset;
char *line;
@@ -110,13 +109,10 @@ struct arch *arch__find(const char *name);
bool arch__is(struct arch *arch, const char *name);
struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl);
-int ins__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name);
bool ins__is_call(const struct ins *ins);
bool ins__is_jump(const struct ins *ins);
bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
-bool ins__is_nop(const struct ins *ins);
bool ins__is_ret(const struct ins *ins);
bool ins__is_lock(const struct ins *ins);
@@ -128,4 +124,6 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size,
int symbol__disassemble(struct symbol *sym, struct annotate_args *args);
+char *expand_tabs(char *line, char **storage, size_t *storage_len);
+
#endif /* __PERF_UTIL_DISASM_H */
diff --git a/tools/perf/util/disasm_bpf.c b/tools/perf/util/disasm_bpf.c
deleted file mode 100644
index 1fee71c79b62..000000000000
--- a/tools/perf/util/disasm_bpf.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include "util/annotate.h"
-#include "util/disasm_bpf.h"
-#include "util/symbol.h"
-#include <linux/zalloc.h>
-#include <string.h>
-
-#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-#define PACKAGE "perf"
-#include <bfd.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <dis-asm.h>
-#include <errno.h>
-#include <linux/btf.h>
-#include <tools/dis-asm-compat.h>
-
-#include "util/bpf-event.h"
-#include "util/bpf-utils.h"
-#include "util/debug.h"
-#include "util/dso.h"
-#include "util/map.h"
-#include "util/env.h"
-#include "util/util.h"
-
-int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct bpf_prog_linfo *prog_linfo = NULL;
- struct bpf_prog_info_node *info_node;
- int len = sym->end - sym->start;
- disassembler_ftype disassemble;
- struct map *map = args->ms.map;
- struct perf_bpil *info_linear;
- struct disassemble_info info;
- struct dso *dso = map__dso(map);
- int pc = 0, count, sub_id;
- struct btf *btf = NULL;
- char tpath[PATH_MAX];
- size_t buf_size;
- int nr_skip = 0;
- char *buf;
- bfd *bfdf;
- int ret;
- FILE *s;
-
- if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
- return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
-
- pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
- sym->name, sym->start, sym->end - sym->start);
-
- memset(tpath, 0, sizeof(tpath));
- perf_exe(tpath, sizeof(tpath));
-
- bfdf = bfd_openr(tpath, NULL);
- if (bfdf == NULL)
- abort();
-
- if (!bfd_check_format(bfdf, bfd_object))
- abort();
-
- s = open_memstream(&buf, &buf_size);
- if (!s) {
- ret = errno;
- goto out;
- }
- init_disassemble_info_compat(&info, s,
- (fprintf_ftype) fprintf,
- fprintf_styled);
- info.arch = bfd_get_arch(bfdf);
- info.mach = bfd_get_mach(bfdf);
-
- info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
- dso__bpf_prog(dso)->id);
- if (!info_node) {
- ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
- goto out;
- }
- info_linear = info_node->info_linear;
- sub_id = dso__bpf_prog(dso)->sub_id;
-
- info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
- info.buffer_length = info_linear->info.jited_prog_len;
-
- if (info_linear->info.nr_line_info)
- prog_linfo = bpf_prog_linfo__new(&info_linear->info);
-
- if (info_linear->info.btf_id) {
- struct btf_node *node;
-
- node = perf_env__find_btf(dso__bpf_prog(dso)->env,
- info_linear->info.btf_id);
- if (node)
- btf = btf__new((__u8 *)(node->data),
- node->data_size);
- }
-
- disassemble_init_for_target(&info);
-
-#ifdef DISASM_FOUR_ARGS_SIGNATURE
- disassemble = disassembler(info.arch,
- bfd_big_endian(bfdf),
- info.mach,
- bfdf);
-#else
- disassemble = disassembler(bfdf);
-#endif
- if (disassemble == NULL)
- abort();
-
- fflush(s);
- do {
- const struct bpf_line_info *linfo = NULL;
- struct disasm_line *dl;
- size_t prev_buf_size;
- const char *srcline;
- u64 addr;
-
- addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
- count = disassemble(pc, &info);
-
- if (prog_linfo)
- linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
- addr, sub_id,
- nr_skip);
-
- if (linfo && btf) {
- srcline = btf__name_by_offset(btf, linfo->line_off);
- nr_skip++;
- } else
- srcline = NULL;
-
- fprintf(s, "\n");
- prev_buf_size = buf_size;
- fflush(s);
-
- if (!annotate_opts.hide_src_code && srcline) {
- args->offset = -1;
- args->line = strdup(srcline);
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl) {
- annotation_line__add(&dl->al,
- &notes->src->source);
- }
- }
-
- args->offset = pc;
- args->line = buf + prev_buf_size;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- pc += count;
- } while (count > 0 && pc < len);
-
- ret = 0;
-out:
- free(prog_linfo);
- btf__free(btf);
- fclose(s);
- bfd_close(bfdf);
- return ret;
-}
-#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, struct annotate_args *args __maybe_unused)
-{
- return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
-}
-#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-
-int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *dl;
-
- args->offset = -1;
- args->line = strdup("to be implemented");
- args->line_nr = 0;
- args->fileloc = NULL;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- zfree(&args->line);
- return 0;
-}
diff --git a/tools/perf/util/disasm_bpf.h b/tools/perf/util/disasm_bpf.h
deleted file mode 100644
index 2ecb19545388..000000000000
--- a/tools/perf/util/disasm_bpf.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#ifndef __PERF_DISASM_BPF_H
-#define __PERF_DISASM_BPF_H
-
-struct symbol;
-struct annotate_args;
-
-int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args);
-int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args);
-
-#endif /* __PERF_DISASM_BPF_H */
diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c
index 988890f37ba7..b48a375e4584 100644
--- a/tools/perf/util/drm_pmu.c
+++ b/tools/perf/util/drm_pmu.c
@@ -10,6 +10,7 @@
#include <api/io.h>
#include <ctype.h>
#include <dirent.h>
+#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <linux/unistd.h>
@@ -119,7 +120,7 @@ static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t li
return NULL;
}
- drm->pmu.cpus = perf_cpu_map__new("0");
+ drm->pmu.cpus = perf_cpu_map__new_int(0);
if (!drm->pmu.cpus) {
perf_pmu__delete(&drm->pmu);
return NULL;
@@ -458,8 +459,10 @@ static int for_each_drm_fdinfo_in_dir(int (*cb)(void *args, int fdinfo_dir_fd, c
}
ret = cb(args, fdinfo_dir_fd, fd_entry->d_name);
if (ret)
- return ret;
+ goto close_fdinfo;
}
+
+close_fdinfo:
if (fdinfo_dir_fd != -1)
close(fdinfo_dir_fd);
closedir(fd_dir);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 282e3af85d5a..344e689567ee 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1798,3 +1798,115 @@ bool is_perf_pid_map_name(const char *dso_name)
return perf_pid_map_tid(dso_name, &tid);
}
+
+struct find_file_offset_data {
+ u64 ip;
+ u64 offset;
+};
+
+/* This will be called for each PHDR in an ELF binary */
+static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
+{
+ struct find_file_offset_data *data = arg;
+
+ if (start <= data->ip && data->ip < start + len) {
+ data->offset = pgoff + data->ip - start;
+ return 1;
+ }
+ return 0;
+}
+
+static const u8 *__dso__read_symbol(struct dso *dso, const char *symfs_filename,
+ u64 start, size_t len,
+ u8 **out_buf, u64 *out_buf_len, bool *is_64bit)
+{
+ struct nscookie nsc;
+ int fd;
+ ssize_t count;
+ struct find_file_offset_data data = {
+ .ip = start,
+ };
+ u8 *code_buf = NULL;
+ int saved_errno;
+
+ nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
+ fd = open(symfs_filename, O_RDONLY);
+ saved_errno = errno;
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0) {
+ errno = saved_errno;
+ return NULL;
+ }
+ if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, is_64bit) <= 0) {
+ close(fd);
+ errno = ENOENT;
+ return NULL;
+ }
+ code_buf = malloc(len);
+ if (code_buf == NULL) {
+ close(fd);
+ errno = ENOMEM;
+ return NULL;
+ }
+ count = pread(fd, code_buf, len, data.offset);
+ saved_errno = errno;
+ close(fd);
+ if ((u64)count != len) {
+ free(code_buf);
+ errno = saved_errno;
+ return NULL;
+ }
+ *out_buf = code_buf;
+ *out_buf_len = len;
+ return code_buf;
+}
+
+/*
+ * Read a symbol into memory for disassembly by a library like capstone of
+ * libLLVM. If memory is allocated out_buf holds it.
+ */
+const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename,
+ const struct map *map, const struct symbol *sym,
+ u8 **out_buf, u64 *out_buf_len, bool *is_64bit)
+{
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ size_t len = end - start;
+
+ *out_buf = NULL;
+ *out_buf_len = 0;
+ *is_64bit = false;
+
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
+ /*
+ * Note, there is fallback BPF image disassembly in the objdump
+ * version but it currently does nothing.
+ */
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+ if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+#ifdef HAVE_LIBBPF_SUPPORT
+ struct bpf_prog_info_node *info_node;
+ struct perf_bpil *info_linear;
+
+ *is_64bit = sizeof(void *) == sizeof(u64);
+ info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
+ dso__bpf_prog(dso)->id);
+ if (!info_node) {
+ errno = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ return NULL;
+ }
+ info_linear = info_node->info_linear;
+ assert(len <= info_linear->info.jited_prog_len);
+ *out_buf_len = len;
+ return (const u8 *)(uintptr_t)(info_linear->info.jited_prog_insns);
+#else
+ pr_debug("No BPF program disassembly support\n");
+ errno = EOPNOTSUPP;
+ return NULL;
+#endif
+ }
+ return __dso__read_symbol(dso, symfs_filename, start, len,
+ out_buf, out_buf_len, is_64bit);
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 3457d713d3c5..f8ccb9816b89 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -10,6 +10,7 @@
#include <stdio.h>
#include <linux/bitops.h>
#include "build-id.h"
+#include "debuginfo.h"
#include "mutex.h"
#include <internal/rc_check.h>
@@ -299,6 +300,7 @@ DECLARE_RC_STRUCT(dso) {
u8 hit:1;
u8 annotate_warned:1;
u8 auxtrace_warned:1;
+ u8 debuginfo_warned:1;
u8 short_name_allocated:1;
u8 long_name_allocated:1;
u8 is_64_bit:1;
@@ -362,6 +364,16 @@ static inline void dso__set_annotate_warned(struct dso *dso)
RC_CHK_ACCESS(dso)->annotate_warned = 1;
}
+static inline bool dso__debuginfo_warned(const struct dso *dso)
+{
+ return RC_CHK_ACCESS(dso)->debuginfo_warned;
+}
+
+static inline void dso__set_debuginfo_warned(struct dso *dso)
+{
+ RC_CHK_ACCESS(dso)->debuginfo_warned = 1;
+}
+
static inline bool dso__auxtrace_warned(const struct dso *dso)
{
return RC_CHK_ACCESS(dso)->auxtrace_warned;
@@ -903,4 +915,17 @@ u64 dso__findnew_global_type(struct dso *dso, u64 addr, u64 offset);
bool perf_pid_map_tid(const char *dso_name, int *tid);
bool is_perf_pid_map_name(const char *dso_name);
+/*
+ * In the future, we may get debuginfo using build-ID (w/o path).
+ * Add this helper is for the smooth conversion.
+ */
+static inline struct debuginfo *dso__debuginfo(struct dso *dso)
+{
+ return debuginfo__new(dso__long_name(dso));
+}
+
+const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename,
+ const struct map *map, const struct symbol *sym,
+ u8 **out_buf, u64 *out_buf_len, bool *is_64bit);
+
#endif /* __PERF_DSO */
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 559c953ca172..9267af204c7d 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1388,18 +1388,19 @@ struct find_var_data {
#define DWARF_OP_DIRECT_REGS 32
static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
- u64 addr_offset, u64 addr_type, bool is_pointer)
+ s64 addr_offset, s64 addr_type, bool is_pointer)
{
Dwarf_Die type_die;
Dwarf_Word size;
+ s64 offset = addr_offset - addr_type;
- if (addr_offset == addr_type) {
+ if (offset == 0) {
/* Update offset relative to the start of the variable */
data->offset = 0;
return true;
}
- if (addr_offset < addr_type)
+ if (offset < 0)
return false;
if (die_get_real_type(die_mem, &type_die) == NULL)
@@ -1414,14 +1415,42 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
if (dwarf_aggregate_size(&type_die, &size) < 0)
return false;
- if (addr_offset >= addr_type + size)
+ if ((u64)offset >= size)
return false;
/* Update offset relative to the start of the variable */
- data->offset = addr_offset - addr_type;
+ data->offset = offset;
return true;
}
+/**
+ * is_breg_access_indirect - Check if breg based access implies type
+ * dereference
+ * @ops: DWARF operations array
+ * @nops: Number of operations in @ops
+ *
+ * Returns true if the DWARF expression evaluates to the variable's
+ * value, so the memory access on that register needs type dereference.
+ * Returns false if the expression evaluates to the variable's address.
+ * This is called after check_allowed_ops.
+ */
+static bool is_breg_access_indirect(Dwarf_Op *ops, size_t nops)
+{
+ /* only the base register */
+ if (nops == 1)
+ return false;
+
+ if (nops == 2 && ops[1].atom == DW_OP_stack_value)
+ return true;
+
+ if (nops == 3 && (ops[1].atom == DW_OP_deref ||
+ ops[1].atom == DW_OP_deref_size) &&
+ ops[2].atom == DW_OP_stack_value)
+ return false;
+ /* unreachable, OP not supported */
+ return false;
+}
+
/* Only checks direct child DIEs in the given scope. */
static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
{
@@ -1450,7 +1479,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
if (data->is_fbreg && ops->atom == DW_OP_fbreg &&
check_allowed_ops(ops, nops) &&
match_var_offset(die_mem, data, data->offset, ops->number,
- /*is_pointer=*/false))
+ is_breg_access_indirect(ops, nops)))
return DIE_FIND_CB_END;
/* Only match with a simple case */
@@ -1462,11 +1491,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
/*is_pointer=*/true))
return DIE_FIND_CB_END;
- /* Local variables accessed by a register + offset */
+ /* variables accessed by a register + offset */
if (ops->atom == (DW_OP_breg0 + data->reg) &&
check_allowed_ops(ops, nops) &&
match_var_offset(die_mem, data, data->offset, ops->number,
- /*is_pointer=*/false))
+ is_breg_access_indirect(ops, nops)))
return DIE_FIND_CB_END;
} else {
/* pointer variables saved in a register 32 or above */
@@ -1476,11 +1505,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
/*is_pointer=*/true))
return DIE_FIND_CB_END;
- /* Local variables accessed by a register + offset */
+ /* variables accessed by a register + offset */
if (ops->atom == DW_OP_bregx && data->reg == ops->number &&
check_allowed_ops(ops, nops) &&
match_var_offset(die_mem, data, data->offset, ops->number2,
- /*is_poitner=*/false))
+ is_breg_access_indirect(ops, nops)))
return DIE_FIND_CB_END;
}
}
@@ -1598,13 +1627,22 @@ static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg)
if (!check_allowed_ops(ops, nops))
return DIE_FIND_CB_SIBLING;
- if (die_get_real_type(die_mem, &type_die) == NULL)
+ if (__die_get_real_type(die_mem, &type_die) == NULL)
return DIE_FIND_CB_SIBLING;
vt = malloc(sizeof(*vt));
if (vt == NULL)
return DIE_FIND_CB_END;
+ /* Usually a register holds the value of a variable */
+ vt->is_reg_var_addr = false;
+
+ if (((ops->atom >= DW_OP_breg0 && ops->atom <= DW_OP_breg31) ||
+ ops->atom == DW_OP_bregx || ops->atom == DW_OP_fbreg) &&
+ !is_breg_access_indirect(ops, nops))
+ /* The register contains an address of the variable. */
+ vt->is_reg_var_addr = true;
+
vt->die_off = dwarf_dieoffset(&type_die);
vt->addr = start;
vt->reg = reg_from_dwarf_op(ops);
@@ -1920,6 +1958,7 @@ struct find_scope_data {
static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg)
{
struct find_scope_data *data = arg;
+ int tag = dwarf_tag(die_mem);
if (dwarf_haspc(die_mem, data->pc)) {
Dwarf_Die *tmp;
@@ -1933,6 +1972,14 @@ static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg)
data->nr++;
return DIE_FIND_CB_CHILD;
}
+
+ /*
+ * If the DIE doesn't have the PC, we still need to check its children
+ * and siblings if it's a container like a namespace.
+ */
+ if (tag == DW_TAG_namespace)
+ return DIE_FIND_CB_CONTINUE;
+
return DIE_FIND_CB_SIBLING;
}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 892c8c5c23fc..cd481ec9c5a1 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -148,6 +148,8 @@ struct die_var_type {
u64 addr;
int reg;
int offset;
+ /* Whether the register holds a address to the type */
+ bool is_reg_var_addr;
};
/* Return type info of a member at offset */
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index c8c248754621..f1626d2032cd 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -802,3 +802,25 @@ bool x86__is_amd_cpu(void)
return is_amd;
}
+
+bool perf_env__is_x86_intel_cpu(struct perf_env *env)
+{
+ static int is_intel; /* 0: Uninitialized, 1: Yes, -1: No */
+
+ if (is_intel == 0)
+ is_intel = env->cpuid && strstarts(env->cpuid, "GenuineIntel") ? 1 : -1;
+
+ return is_intel >= 1 ? true : false;
+}
+
+bool x86__is_intel_cpu(void)
+{
+ struct perf_env env = { .total_mem = 0, };
+ bool is_intel;
+
+ perf_env__cpuid(&env);
+ is_intel = perf_env__is_x86_intel_cpu(&env);
+ perf_env__exit(&env);
+
+ return is_intel;
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index e00179787a34..9977b85523a8 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -201,5 +201,7 @@ void perf_env__find_br_cntr_info(struct perf_env *env,
bool x86__is_amd_cpu(void);
bool perf_env__is_x86_amd_cpu(struct perf_env *env);
+bool x86__is_intel_cpu(void);
+bool perf_env__is_x86_intel_cpu(struct perf_env *env);
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fcf44149feb2..4c92cc1a952c 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_CGROUP] = "CGROUP",
[PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
[PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID",
+ [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index e40d16d3246c..64c63b59d617 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -117,6 +117,7 @@ enum perf_synth_id {
PERF_SYNTH_INTEL_PSB,
PERF_SYNTH_INTEL_EVT,
PERF_SYNTH_INTEL_IFLAG_CHG,
+ PERF_SYNTH_POWERPC_VPA_DTL,
};
/*
@@ -254,6 +255,25 @@ struct perf_synth_intel_iflag_chg {
u64 branch_ip; /* If via_branch */
};
+/*
+ * The powerpc VPA DTL entries are of below format
+ */
+struct powerpc_vpadtl_entry {
+ u8 dispatch_reason;
+ u8 preempt_reason;
+ u16 processor_id;
+ u32 enqueue_to_dispatch_time;
+ u32 ready_to_enqueue_time;
+ u32 waiting_to_ready_time;
+ u64 timebase;
+ u64 fault_addr;
+ u64 srr0;
+ u64 srr1;
+};
+
+extern const char *dispatch_reasons[11];
+extern const char *preempt_reasons[10];
+
static inline void *perf_synth__raw_data(void *p)
{
return p + 4;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 80d8387e6b97..03674d2cbd01 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.pos = -1;
evlist->nr_br_cntr = -1;
metricgroup__rblist_init(&evlist->metric_events);
+ INIT_LIST_HEAD(&evlist->deferred_samples);
}
struct evlist *evlist__new(void)
@@ -101,16 +102,24 @@ struct evlist *evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
bool can_profile_kernel;
- int err;
+ struct perf_pmu *pmu = NULL;
if (!evlist)
return NULL;
can_profile_kernel = perf_event_paranoid_check(1);
- err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
- if (err) {
- evlist__delete(evlist);
- return NULL;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ char buf[256];
+ int err;
+
+ snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+ can_profile_kernel ? "P" : "Pu");
+ err = parse_event(evlist, buf);
+ if (err) {
+ evlist__delete(evlist);
+ return NULL;
+ }
}
if (evlist->core.nr_entries > 1) {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 5e71e3dc6042..911834ae7c2a 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -92,6 +92,8 @@ struct evlist {
* of struct metric_expr.
*/
struct rblist metric_events;
+ /* samples with deferred_callchain would wait here. */
+ struct list_head deferred_samples;
};
struct evsel_str_handler {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d264c143b592..9cd706f62793 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -402,11 +402,11 @@ void evsel__init(struct evsel *evsel,
evsel->sample_size = __evsel__sample_size(attr->sample_type);
evsel__calc_id_pos(evsel);
evsel->cmdline_group_boundary = false;
- evsel->metric_events = NULL;
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->group_pmu_name = NULL;
evsel->skippable = false;
+ evsel->supported = true;
evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c
}
@@ -538,6 +538,7 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
#endif
evsel->handler = orig->handler;
evsel->core.leader = orig->core.leader;
+ evsel->metric_leader = orig->metric_leader;
evsel->max_events = orig->max_events;
zfree(&evsel->unit);
@@ -1065,6 +1066,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
pr_info("Disabling user space callchains for function trace event.\n");
attr->exclude_callchain_user = 1;
}
+
+ if (param->defer && !attr->exclude_callchain_user)
+ attr->defer_callchain = 1;
}
void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
@@ -1091,6 +1095,71 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
}
}
+static void evsel__apply_ratio_to_prev(struct evsel *evsel,
+ struct perf_event_attr *attr,
+ struct record_opts *opts,
+ const char *buf)
+{
+ struct perf_event_attr *prev_attr = NULL;
+ struct evsel *evsel_prev = NULL;
+ u64 type = evsel->core.attr.sample_type;
+ u64 prev_type = 0;
+ double rtp;
+
+ rtp = strtod(buf, NULL);
+ if (rtp <= 0) {
+ pr_err("Invalid ratio-to-prev value %lf\n", rtp);
+ return;
+ }
+ if (evsel == evsel__leader(evsel)) {
+ pr_err("Invalid use of ratio-to-prev term without preceding element in group\n");
+ return;
+ }
+ if (!evsel->pmu->is_core) {
+ pr_err("Event using ratio-to-prev term must have a core PMU\n");
+ return;
+ }
+
+ evsel_prev = evsel__prev(evsel);
+ if (!evsel_prev) {
+ pr_err("Previous event does not exist.\n");
+ return;
+ }
+
+ if (evsel_prev->pmu->type != evsel->pmu->type) {
+ pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n",
+ evsel->name, evsel_prev->name);
+ return;
+ }
+
+ prev_attr = &evsel_prev->core.attr;
+ prev_type = evsel_prev->core.attr.sample_type;
+
+ if (!(prev_type & PERF_SAMPLE_PERIOD)) {
+ attr->sample_period = prev_attr->sample_period * rtp;
+ attr->freq = 0;
+ evsel__reset_sample_bit(evsel, PERIOD);
+ } else if (!(type & PERF_SAMPLE_PERIOD)) {
+ prev_attr->sample_period = attr->sample_period / rtp;
+ prev_attr->freq = 0;
+ evsel__reset_sample_bit(evsel_prev, PERIOD);
+ } else {
+ if (opts->user_interval != ULLONG_MAX) {
+ prev_attr->sample_period = opts->user_interval;
+ attr->sample_period = prev_attr->sample_period * rtp;
+ prev_attr->freq = 0;
+ attr->freq = 0;
+ evsel__reset_sample_bit(evsel_prev, PERIOD);
+ evsel__reset_sample_bit(evsel, PERIOD);
+ } else {
+ pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n");
+ return;
+ }
+ }
+
+ arch_evsel__apply_ratio_to_prev(evsel, attr);
+}
+
static void evsel__apply_config_terms(struct evsel *evsel,
struct record_opts *opts, bool track)
{
@@ -1104,6 +1173,7 @@ static void evsel__apply_config_terms(struct evsel *evsel,
u32 dump_size = 0;
int max_stack = 0;
const char *callgraph_buf = NULL;
+ const char *rtp_buf = NULL;
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
@@ -1174,6 +1244,9 @@ static void evsel__apply_config_terms(struct evsel *evsel,
break;
case EVSEL__CONFIG_TERM_CFG_CHG:
break;
+ case EVSEL__CONFIG_TERM_RATIO_TO_PREV:
+ rtp_buf = term->val.str;
+ break;
default:
break;
}
@@ -1225,6 +1298,8 @@ static void evsel__apply_config_terms(struct evsel *evsel,
evsel__config_callchain(evsel, opts, &param);
}
}
+ if (rtp_buf)
+ evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf);
}
struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
@@ -1249,6 +1324,11 @@ void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
{
}
+void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused)
+{
+}
+
static void evsel__set_default_freq_period(struct record_opts *opts,
struct perf_event_attr *attr)
{
@@ -1435,6 +1515,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
attr->build_id = track && opts->build_id;
+ attr->defer_output = track && callchain && callchain->defer;
/*
* ksymbol is tracked separately with text poke because it needs to be
@@ -1677,7 +1758,6 @@ void evsel__exit(struct evsel *evsel)
evsel__zero_per_pkg(evsel);
hashmap__free(evsel->per_pkg_mask);
evsel->per_pkg_mask = NULL;
- zfree(&evsel->metric_events);
if (evsel__priv_destructor)
evsel__priv_destructor(evsel->priv);
perf_evsel__object.fini(evsel);
@@ -1863,16 +1943,19 @@ bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
u32 e_type = evsel->core.attr.type;
u64 e_config = evsel->core.attr.config;
- if (e_type != type) {
- return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core &&
- evsel->alternate_hw_config == config;
- }
-
- if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
- perf_pmus__supports_extended_type())
+ if (e_type == type && e_config == config)
+ return true;
+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
+ return false;
+ if ((e_type == PERF_TYPE_HARDWARE || e_type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
e_config &= PERF_HW_EVENT_MASK;
-
- return e_config == config;
+ if (e_type == type && e_config == config)
+ return true;
+ if (type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core &&
+ evsel->alternate_hw_config == config)
+ return true;
+ return false;
}
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
@@ -1941,7 +2024,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
struct evsel *leader = evsel__leader(evsel);
int fd;
- if (evsel__is_group_leader(evsel))
+ if (!evsel->supported || evsel__is_group_leader(evsel))
return -1;
/*
@@ -1955,7 +2038,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
return -1;
fd = FD(leader, cpu_map_idx, thread);
- BUG_ON(fd == -1 && !leader->skippable);
+ BUG_ON(fd == -1 && leader->supported);
/*
* When the leader has been skipped, return -2 to distinguish from no
@@ -2121,6 +2204,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
+ evsel->core.attr.defer_callchain = 0;
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
+ evsel->core.attr.defer_output = 0;
if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ))
evsel->core.attr.inherit = 0;
@@ -2395,8 +2482,15 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
/* Please add new feature detection here. */
+ attr.defer_callchain = true;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.defer_callchain = true;
+ pr_debug2("switching off deferred callchain support\n");
+ attr.defer_callchain = false;
+
attr.inherit = true;
- attr.sample_type = PERF_SAMPLE_READ;
+ attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
if (has_attr_feature(&attr, /*flags=*/0))
goto found;
perf_missing_features.inherit_sample_read = true;
@@ -2506,6 +2600,10 @@ found:
errno = old_errno;
check:
+ if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
+ perf_missing_features.defer_callchain)
+ return true;
+
if (evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
perf_missing_features.inherit_sample_read)
@@ -2573,12 +2671,14 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
enum rlimit_action set_rlimit = NO_CHANGE;
struct perf_cpu cpu;
- if (evsel__is_retire_lat(evsel))
- return evsel__tpebs_open(evsel);
+ if (evsel__is_retire_lat(evsel)) {
+ err = evsel__tpebs_open(evsel);
+ goto out;
+ }
err = __evsel__prepare_open(evsel, cpus, threads);
if (err)
- return err;
+ goto out;
if (cpus == NULL)
cpus = empty_cpu_map;
@@ -2598,19 +2698,22 @@ fallback_missing_features:
display_attr(&evsel->core.attr);
if (evsel__is_tool(evsel)) {
- return evsel__tool_pmu_open(evsel, threads,
- start_cpu_map_idx,
- end_cpu_map_idx);
+ err = evsel__tool_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ goto out;
}
if (evsel__is_hwmon(evsel)) {
- return evsel__hwmon_pmu_open(evsel, threads,
- start_cpu_map_idx,
- end_cpu_map_idx);
+ err = evsel__hwmon_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ goto out;
}
if (evsel__is_drm(evsel)) {
- return evsel__drm_pmu_open(evsel, threads,
- start_cpu_map_idx,
- end_cpu_map_idx);
+ err = evsel__drm_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ goto out;
}
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
@@ -2689,7 +2792,8 @@ retry_open:
}
}
- return 0;
+ err = 0;
+ goto out;
try_fallback:
if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
@@ -2728,6 +2832,9 @@ out_close:
thread = nthreads;
} while (--idx >= 0);
errno = old_errno;
+out:
+ if (err)
+ evsel->supported = false;
return err;
}
@@ -3002,6 +3109,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
data->data_src = PERF_MEM_DATA_SRC_NONE;
data->vcpu = -1;
+ if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
+ const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+ data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
+ if (data->callchain->nr > max_callchain_nr)
+ return -EFAULT;
+
+ data->deferred_cookie = event->callchain_deferred.cookie;
+
+ if (evsel->core.attr.sample_id_all)
+ perf_evsel__parse_id_sample(evsel, event, data);
+ return 0;
+ }
+
if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->core.attr.sample_id_all)
return 0;
@@ -3126,12 +3247,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+ u64 callchain_nr;
OVERFLOW_CHECK_u64(array);
data->callchain = (struct ip_callchain *)array++;
- if (data->callchain->nr > max_callchain_nr)
+ callchain_nr = data->callchain->nr;
+ if (callchain_nr > max_callchain_nr)
return -EFAULT;
- sz = data->callchain->nr * sizeof(u64);
+ sz = callchain_nr * sizeof(u64);
+ /*
+ * Save the cookie for the deferred user callchain. The last 2
+ * entries in the callchain should be the context marker and the
+ * cookie. The cookie will be used to match PERF_RECORD_
+ * CALLCHAIN_DEFERRED later.
+ */
+ if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
+ data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
+ data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
+ data->deferred_callchain = true;
+ }
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
@@ -3562,7 +3696,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
/* If event has exclude user then don't exclude kernel. */
if (evsel->core.attr.exclude_user)
- return false;
+ goto no_fallback;
/* Is there already the separator in the name. */
if (strchr(name, '/') ||
@@ -3570,7 +3704,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
sep = "";
if (asprintf(&new_name, "%s%su", name, sep) < 0)
- return false;
+ goto no_fallback;
free(evsel->name);
evsel->name = new_name;
@@ -3593,17 +3727,19 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
sep = "";
if (asprintf(&new_name, "%s%sH", name, sep) < 0)
- return false;
+ goto no_fallback;
free(evsel->name);
evsel->name = new_name;
/* Apple M1 requires exclude_guest */
- scnprintf(msg, msgsize, "trying to fall back to excluding guest samples");
+ scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
evsel->core.attr.exclude_guest = 1;
return true;
}
-
+no_fallback:
+ scnprintf(msg, msgsize, "No fallback found for '%s' for error %d",
+ evsel__name(evsel), err);
return false;
}
@@ -3716,6 +3852,7 @@ static int dump_perf_event_processes(char *msg, size_t size)
}
int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
+ int err __maybe_unused,
char *msg __maybe_unused,
size_t size __maybe_unused)
{
@@ -3725,6 +3862,7 @@ int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
int evsel__open_strerror(struct evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{
+ struct perf_pmu *pmu;
char sbuf[STRERR_BUFSIZE];
int printed = 0, enforced = 0;
int ret;
@@ -3840,7 +3978,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel.");
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
- if (!target__has_cpu(target))
+ pmu = evsel__find_pmu(evsel);
+ if (!pmu->is_core && !target__has_cpu(target))
return scnprintf(msg, size,
"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
evsel__name(evsel));
@@ -3853,7 +3992,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
break;
}
- ret = arch_evsel__open_strerror(evsel, msg, size);
+ ret = arch_evsel__open_strerror(evsel, err, msg, size);
if (ret)
return ret;
@@ -3882,6 +4021,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
if (evsel__is_retire_lat(evsel))
return 0;
+ if (perf_pmu__kind(evsel->pmu) != PERF_PMU_KIND_PE)
+ return 0;
+
for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
@@ -3935,6 +4077,8 @@ bool evsel__is_hybrid(const struct evsel *evsel)
struct evsel *evsel__leader(const struct evsel *evsel)
{
+ if (evsel->core.leader == NULL)
+ return NULL;
return container_of(evsel->core.leader, struct evsel, core);
}
@@ -4048,9 +4192,9 @@ bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_con
void evsel__uniquify_counter(struct evsel *counter)
{
- const char *name, *pmu_name;
- char *new_name, *config;
- int ret;
+ const char *name, *pmu_name, *config;
+ char *new_name;
+ int len, ret;
/* No uniquification necessary. */
if (!counter->needs_uniquify)
@@ -4064,15 +4208,23 @@ void evsel__uniquify_counter(struct evsel *counter)
counter->uniquified_name = true;
name = evsel__name(counter);
+ config = strchr(name, '/');
pmu_name = counter->pmu->name;
- /* Already prefixed by the PMU name. */
- if (!strncmp(name, pmu_name, strlen(pmu_name)))
- return;
- config = strchr(name, '/');
- if (config) {
- int len = config - name;
+ /* Already prefixed by the PMU name? */
+ len = pmu_name_len_no_suffix(pmu_name);
+
+ if (!strncmp(name, pmu_name, len)) {
+ /*
+ * If the PMU name is there, then there is no sense in not
+ * having a slash. Do this for robustness.
+ */
+ if (config == NULL)
+ config = name - 1;
+ ret = asprintf(&new_name, "%s/%s", pmu_name, config + 1);
+ } else if (config) {
+ len = config - name;
if (config[1] == '/') {
/* case: event// */
ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
@@ -4084,7 +4236,7 @@ void evsel__uniquify_counter(struct evsel *counter)
config = strchr(name, ':');
if (config) {
/* case: event:.. */
- int len = config - name;
+ len = config - name;
ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
} else {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5797a02e5d6a..a08130ff2e47 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -89,6 +89,7 @@ struct evsel {
bool use_config_name;
bool skippable;
bool retire_lat;
+ bool dont_regroup;
int bpf_fd;
struct bpf_object *bpf_obj;
struct list_head config_terms;
@@ -99,7 +100,6 @@ struct evsel {
* metric fields are similar, but needs more care as they can have
* references to other metric (evsel).
*/
- struct evsel **metric_events;
struct evsel *metric_leader;
void *handler;
@@ -120,9 +120,9 @@ struct evsel {
bool forced_leader;
bool cmdline_group_boundary;
bool reset_group;
- bool errored;
bool needs_auxtrace_mmap;
bool default_metricgroup; /* A member of the Default metricgroup */
+ bool default_show_events; /* If a default group member, show the event */
bool needs_uniquify;
struct hashmap *per_pkg_mask;
int err;
@@ -221,6 +221,7 @@ struct perf_missing_features {
bool branch_counters;
bool aux_action;
bool inherit_sample_read;
+ bool defer_callchain;
};
extern struct perf_missing_features perf_missing_features;
@@ -341,7 +342,8 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
void arch_evsel__set_sample_weight(struct evsel *evsel);
void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr);
-int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size);
+int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size);
+void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, struct perf_event_attr *attr);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index 94a1e9cf73d6..bcd3a978f0c4 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -28,6 +28,7 @@ enum evsel_term_type {
EVSEL__CONFIG_TERM_AUX_ACTION,
EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
EVSEL__CONFIG_TERM_CFG_CHG,
+ EVSEL__CONFIG_TERM_RATIO_TO_PREV,
};
struct evsel_config_term {
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 103984b29b1e..10f1a03c2860 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -168,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
node_al.addr = addr;
node_al.map = map__get(map);
- if (print_symoffset) {
+ if (sample->deferred_callchain &&
+ sample->deferred_cookie == node->ip) {
+ printed += fprintf(fp, "(cookie)");
+ } else if (print_symoffset) {
printed += __symbol__fprintf_symname_offs(sym, &node_al,
print_unknown_as_addr,
true, fp);
diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c
index 40cb56a9347d..d4c06a3f825a 100644
--- a/tools/perf/util/evswitch.c
+++ b/tools/perf/util/evswitch.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+#include <errno.h>
#include "evswitch.h"
#include "evlist.h"
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 7fda0ff89c16..465fe2e9bbbe 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -401,14 +401,12 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
if (ev != TOOL_PMU__EVENT_NONE) {
u64 count;
- if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count))
+ if (tool_pmu__read_event(ev, /*evsel=*/NULL,
+ ctx->system_wide, ctx->user_requested_cpu_list,
+ &count))
result = count;
else
pr_err("Failure to read '%s'", literal);
-
- } else if (!strcmp("#core_wide", literal)) {
- result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list)
- ? 1.0 : 0.0;
} else {
pr_err("Unrecognized literal '%s'", literal);
}
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index 591548b10e34..a1cd5196f4ec 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -173,6 +173,8 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
Elf_Shdr *shdr;
uint64_t eh_frame_base_offset;
char *strsym = NULL;
+ void *build_id_data = NULL, *tmp;
+ int build_id_data_len;
int symlen;
int retval = -1;
@@ -251,6 +253,14 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
shdr->sh_entsize = 0;
+ build_id_data = malloc(csize);
+ if (build_id_data == NULL) {
+ warnx("cannot allocate build-id data");
+ goto error;
+ }
+ memcpy(build_id_data, code, csize);
+ build_id_data_len = csize;
+
/*
* Setup .eh_frame_hdr and .eh_frame
*/
@@ -334,6 +344,15 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
shdr->sh_entsize = sizeof(Elf_Sym);
shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */
+ tmp = realloc(build_id_data, build_id_data_len + sizeof(symtab));
+ if (tmp == NULL) {
+ warnx("cannot allocate build-id data");
+ goto error;
+ }
+ memcpy(tmp + build_id_data_len, symtab, sizeof(symtab));
+ build_id_data = tmp;
+ build_id_data_len += sizeof(symtab);
+
/*
* setup symbols string table
* 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
@@ -376,6 +395,15 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
shdr->sh_flags = 0;
shdr->sh_entsize = 0;
+ tmp = realloc(build_id_data, build_id_data_len + symlen);
+ if (tmp == NULL) {
+ warnx("cannot allocate build-id data");
+ goto error;
+ }
+ memcpy(tmp + build_id_data_len, strsym, symlen);
+ build_id_data = tmp;
+ build_id_data_len += symlen;
+
/*
* setup build-id section
*/
@@ -394,7 +422,7 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
/*
* build-id generation
*/
- sha1(code, csize, bnote.build_id);
+ sha1(build_id_data, build_id_data_len, bnote.build_id);
bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
bnote.desc.descsz = sizeof(bnote.build_id);
bnote.desc.type = NT_GNU_BUILD_ID;
@@ -439,7 +467,7 @@ error:
(void)elf_end(e);
free(strsym);
-
+ free(build_id_data);
return retval;
}
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
deleted file mode 100644
index e68935e9ac8c..000000000000
--- a/tools/perf/util/get_current_dir_name.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
-//
-#ifndef HAVE_GET_CURRENT_DIR_NAME
-#include "get_current_dir_name.h"
-#include <limits.h>
-#include <string.h>
-#include <unistd.h>
-
-/* Android's 'bionic' library, for one, doesn't have this */
-
-char *get_current_dir_name(void)
-{
- char pwd[PATH_MAX];
-
- return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
-}
-#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h
deleted file mode 100644
index 69f7d5537d32..000000000000
--- a/tools/perf/util/get_current_dir_name.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
-//
-#ifndef __PERF_GET_CURRENT_DIR_NAME_H
-#ifndef HAVE_GET_CURRENT_DIR_NAME
-char *get_current_dir_name(void);
-#endif // HAVE_GET_CURRENT_DIR_NAME
-#endif // __PERF_GET_CURRENT_DIR_NAME_H
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4f2a6e10ed5c..f5cad377c99e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -335,7 +335,6 @@ static int write_build_id(struct feat_fd *ff,
pr_debug("failed to write buildid table\n");
return err;
}
- perf_session__cache_build_ids(session);
return 0;
}
@@ -1022,12 +1021,9 @@ static int write_bpf_prog_info(struct feat_fd *ff,
down_read(&env->bpf_progs.lock);
- if (env->bpf_progs.infos_cnt == 0)
- goto out;
-
ret = do_write(ff, &env->bpf_progs.infos_cnt,
sizeof(env->bpf_progs.infos_cnt));
- if (ret < 0)
+ if (ret < 0 || env->bpf_progs.infos_cnt == 0)
goto out;
root = &env->bpf_progs.infos;
@@ -1067,13 +1063,10 @@ static int write_bpf_btf(struct feat_fd *ff,
down_read(&env->bpf_progs.lock);
- if (env->bpf_progs.btfs_cnt == 0)
- goto out;
-
ret = do_write(ff, &env->bpf_progs.btfs_cnt,
sizeof(env->bpf_progs.btfs_cnt));
- if (ret < 0)
+ if (ret < 0 || env->bpf_progs.btfs_cnt == 0)
goto out;
root = &env->bpf_progs.btfs;
@@ -1561,7 +1554,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
static int write_cpu_pmu_caps(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- struct perf_pmu *cpu_pmu = perf_pmus__find("cpu");
+ struct perf_pmu *cpu_pmu = perf_pmus__find_core_pmu();
int ret;
if (!cpu_pmu)
@@ -4541,7 +4534,8 @@ int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused
}
#ifdef HAVE_LIBTRACEEVENT
-int perf_event__process_tracing_data(struct perf_session *session,
+int perf_event__process_tracing_data(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
ssize_t size_read, padding, size = event->tracing_data.size;
@@ -4589,7 +4583,8 @@ int perf_event__process_tracing_data(struct perf_session *session,
}
#endif
-int perf_event__process_build_id(struct perf_session *session,
+int perf_event__process_build_id(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
__event_process_build_id(&event->build_id,
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d16dfceccd74..c058021c3150 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -178,10 +178,12 @@ int perf_event__process_event_update(const struct perf_tool *tool,
size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
#ifdef HAVE_LIBTRACEEVENT
-int perf_event__process_tracing_data(struct perf_session *session,
+int perf_event__process_tracing_data(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
#endif
-int perf_event__process_build_id(struct perf_session *session,
+int perf_event__process_build_id(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
bool is_perf_magic(u64 magic);
diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build
index 3298f7b7e308..2ee0eb731656 100644
--- a/tools/perf/util/hisi-ptt-decoder/Build
+++ b/tools/perf/util/hisi-ptt-decoder/Build
@@ -1 +1 @@
-perf-util-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o
+perf-util-y += hisi-ptt-pkt-decoder.o
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 64ff427040c3..ef4b569f7df4 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -608,10 +608,8 @@ err_infos:
map_symbol__exit(&he->branch_info->to.ms);
zfree(&he->branch_info);
}
- if (he->mem_info) {
- map_symbol__exit(&mem_info__iaddr(he->mem_info)->ms);
- map_symbol__exit(&mem_info__daddr(he->mem_info)->ms);
- }
+ if (he->mem_info)
+ mem_info__zput(he->mem_info);
err:
map_symbol__exit(&he->ms);
zfree(&he->stat_acc);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 70438d03ca9c..1d5ea632ca4e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -709,15 +709,18 @@ struct block_hist {
struct hist_entry he;
};
+#define NO_ADDR 0
+
#ifdef HAVE_SLANG_SUPPORT
#include "../ui/keysyms.h"
void attr_to_script(char *buf, struct perf_event_attr *attr);
-int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
- struct hist_browser_timer *hbt);
+int __hist_entry__tui_annotate(struct hist_entry *he, struct map_symbol *ms,
+ struct evsel *evsel,
+ struct hist_browser_timer *hbt, u64 al_addr);
int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
- struct hist_browser_timer *hbt);
+ struct hist_browser_timer *hbt, u64 al_addr);
int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
float min_pcnt, struct perf_env *env, bool warn_lost_event);
@@ -742,16 +745,19 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
{
return 0;
}
-static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
- struct evsel *evsel __maybe_unused,
- struct hist_browser_timer *hbt __maybe_unused)
+static inline int __hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+ struct map_symbol *ms __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ u64 al_addr __maybe_unused)
{
return 0;
}
static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
struct evsel *evsel __maybe_unused,
- struct hist_browser_timer *hbt __maybe_unused)
+ struct hist_browser_timer *hbt __maybe_unused,
+ u64 al_addr __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index 416dfea9ffff..279d6b1a47f0 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -376,7 +376,7 @@ struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir,
perf_pmu__delete(&hwm->pmu);
return NULL;
}
- hwm->pmu.cpus = perf_cpu_map__new("0");
+ hwm->pmu.cpus = perf_cpu_map__new_int(0);
if (!hwm->pmu.cpus) {
perf_pmu__delete(&hwm->pmu);
return NULL;
@@ -742,8 +742,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
continue;
}
io__init(&io, name_fd, buf2, sizeof(buf2));
- io__getline(&io, &line, &line_len);
- if (line_len > 0 && line[line_len - 1] == '\n')
+ if (io__getline(&io, &line, &line_len) > 0 && line[line_len - 1] == '\n')
line[line_len - 1] = '\0';
hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line);
close(name_fd);
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
index dc711b289ff5..d1e403c8b70b 100644
--- a/tools/perf/util/hwmon_pmu.h
+++ b/tools/perf/util/hwmon_pmu.h
@@ -37,7 +37,7 @@ enum hwmon_type {
/**
* enum hwmon_item:
*
- * Similar to enum hwmon_type but describes the item part of a a sysfs filename.
+ * Similar to enum hwmon_type but describes the item part of a sysfs filename.
*
* This enum is exposed for testing.
*/
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 89979ca23c3f..34e2fdfe7300 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -120,7 +120,7 @@
#endif
// In the kernel sources (include/linux/cfi_types.h), this has a different
-// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
+// definition when CONFIG_CFI is used, for tools/ just use the !cfi
// definition:
#ifndef SYM_TYPED_START
#define SYM_TYPED_START(name, linkage, align...) \
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 3625c6224750..382255393fb3 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -777,9 +777,7 @@ static int intel_bts_synth_events(struct intel_bts *bts,
attr.sample_id_all = evsel->core.attr.sample_id_all;
attr.read_format = evsel->core.attr.read_format;
- id = evsel->core.id[0] + 1000000000;
- if (!id)
- id = 1;
+ id = auxtrace_synth_id_range_start(evsel);
if (bts->synth_opts.branches) {
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 5b8f0149167d..8fd7e4330044 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -1,4 +1,4 @@
-perf-util-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+perf-util-y += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt
@@ -7,11 +7,7 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
$(call rule_mkdir)
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
-ifeq ($(SRCARCH),x86)
- perf-util-y += inat.o insn.o
-else
- perf-util-$(CONFIG_AUXTRACE) += inat.o insn.o
-endif
+perf-util-y += inat.o insn.o
$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(call rule_mkdir)
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 8fabddc1c0da..72c7a4e15d61 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -32,7 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn,
intel_pt_insn->rel = 0;
intel_pt_insn->emulated_ptwrite = false;
- if (insn_is_avx(insn)) {
+ if (insn_is_avx_or_xop(insn)) {
intel_pt_insn->op = INTEL_PT_OP_OTHER;
intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
intel_pt_insn->length = insn->length;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 9b1011fe4826..fc9eec8b54b8 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -3987,9 +3987,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
attr.sample_id_all = evsel->core.attr.sample_id_all;
attr.read_format = evsel->core.attr.read_format;
- id = evsel->core.id[0] + 1000000000;
- if (!id)
- id = 1;
+ id = auxtrace_synth_id_range_start(evsel);
if (pt->synth_opts.branches) {
attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 8c9aee157ec4..3c958d738ca6 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -25,6 +25,7 @@
#include "stat.h"
#include <sys/stat.h>
#include <sys/file.h>
+#include <errno.h>
#include <poll.h>
#include <math.h>
@@ -216,7 +217,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
return 0;
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (event->feat.feat_id < HEADER_LAST_FEATURE)
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index b062b1f234b6..f00814e37de9 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -233,7 +233,8 @@ jit_open(struct jit_buf_desc *jd, const char *name)
/*
* keep dirname for generating files and mmap records
*/
- strcpy(jd->dir, name);
+ strncpy(jd->dir, name, PATH_MAX);
+ jd->dir[PATH_MAX - 1] = '\0';
dirname(jd->dir);
free(buf);
@@ -546,6 +547,8 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
if (dso)
dso__set_hit(dso);
+
+ dso__put(dso);
}
out:
perf_sample__exit(&sample);
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 4249542544bb..a356b839c2ee 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -10,6 +10,7 @@
#include "symbol.h"
#include "record.h"
+#include <errno.h>
#include <stdlib.h>
#include <linux/zalloc.h>
@@ -190,5 +191,15 @@ static inline struct kvm_info *kvm_info__new(void)
#define kvm_info__zput(ki) do { } while (0)
#endif /* HAVE_KVM_STAT_SUPPORT */
+#define STRDUP_FAIL_EXIT(s) \
+ ({ char *_p; \
+ _p = strdup(s); \
+ if (!_p) { \
+ ret = -ENOMEM; \
+ goto EXIT; \
+ } \
+ _p; \
+ })
+
extern int kvm_add_default_arch_event(int *argc, const char **argv);
#endif /* __PERF_KVM_STAT_H */
diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c
new file mode 100644
index 000000000000..cc0c474cbfaa
--- /dev/null
+++ b/tools/perf/util/libbfd.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "libbfd.h"
+#include "annotate.h"
+#include "bpf-event.h"
+#include "bpf-utils.h"
+#include "debug.h"
+#include "dso.h"
+#include "env.h"
+#include "map.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "symbol_conf.h"
+#include "util.h"
+#include <tools/dis-asm-compat.h>
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#endif
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define PACKAGE "perf"
+#include <bfd.h>
+
+/*
+ * Implement addr2line using libbfd.
+ */
+struct a2l_data {
+ const char *input;
+ u64 addr;
+
+ bool found;
+ const char *filename;
+ const char *funcname;
+ unsigned int line;
+
+ bfd *abfd;
+ asymbol **syms;
+};
+
+static bool perf_bfd_lock(void *bfd_mutex)
+{
+ mutex_lock(bfd_mutex);
+ return true;
+}
+
+static bool perf_bfd_unlock(void *bfd_mutex)
+{
+ mutex_unlock(bfd_mutex);
+ return true;
+}
+
+static void perf_bfd_init(void)
+{
+ static struct mutex bfd_mutex;
+
+ mutex_init_recursive(&bfd_mutex);
+
+ if (bfd_init() != BFD_INIT_MAGIC) {
+ pr_err("Error initializing libbfd\n");
+ return;
+ }
+ if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex))
+ pr_err("Error initializing libbfd threading\n");
+}
+
+static void ensure_bfd_init(void)
+{
+ static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&bfd_init_once, perf_bfd_init);
+}
+
+static int bfd_error(const char *string)
+{
+ const char *errmsg;
+
+ errmsg = bfd_errmsg(bfd_get_error());
+ fflush(stdout);
+
+ if (string)
+ pr_debug("%s: %s\n", string, errmsg);
+ else
+ pr_debug("%s\n", errmsg);
+
+ return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+ long storage;
+ long symcount;
+ asymbol **syms;
+ bfd_boolean dynamic = FALSE;
+
+ if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+ return bfd_error(bfd_get_filename(abfd));
+
+ storage = bfd_get_symtab_upper_bound(abfd);
+ if (storage == 0L) {
+ storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+ dynamic = TRUE;
+ }
+ if (storage < 0L)
+ return bfd_error(bfd_get_filename(abfd));
+
+ syms = malloc(storage);
+ if (dynamic)
+ symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+ else
+ symcount = bfd_canonicalize_symtab(abfd, syms);
+
+ if (symcount < 0) {
+ free(syms);
+ return bfd_error(bfd_get_filename(abfd));
+ }
+
+ a2l->syms = syms;
+ return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+ bfd_vma pc, vma;
+ bfd_size_type size;
+ struct a2l_data *a2l = data;
+ flagword flags;
+
+ if (a2l->found)
+ return;
+
+#ifdef bfd_get_section_flags
+ flags = bfd_get_section_flags(abfd, section);
+#else
+ flags = bfd_section_flags(section);
+#endif
+ if ((flags & SEC_ALLOC) == 0)
+ return;
+
+ pc = a2l->addr;
+#ifdef bfd_get_section_vma
+ vma = bfd_get_section_vma(abfd, section);
+#else
+ vma = bfd_section_vma(section);
+#endif
+#ifdef bfd_get_section_size
+ size = bfd_get_section_size(section);
+#else
+ size = bfd_section_size(section);
+#endif
+
+ if (pc < vma || pc >= vma + size)
+ return;
+
+ a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+ &a2l->filename, &a2l->funcname,
+ &a2l->line);
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+ bfd *abfd;
+ struct a2l_data *a2l = NULL;
+
+ ensure_bfd_init();
+ abfd = bfd_openr(path, NULL);
+ if (abfd == NULL)
+ return NULL;
+
+ if (!bfd_check_format(abfd, bfd_object))
+ goto out;
+
+ a2l = zalloc(sizeof(*a2l));
+ if (a2l == NULL)
+ goto out;
+
+ a2l->abfd = abfd;
+ a2l->input = strdup(path);
+ if (a2l->input == NULL)
+ goto out;
+
+ if (slurp_symtab(abfd, a2l))
+ goto out;
+
+ return a2l;
+
+out:
+ if (a2l) {
+ zfree((char **)&a2l->input);
+ free(a2l);
+ }
+ bfd_close(abfd);
+ return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+ if (a2l->abfd)
+ bfd_close(a2l->abfd);
+ zfree((char **)&a2l->input);
+ zfree(&a2l->syms);
+ free(a2l);
+}
+
+static int inline_list__append_dso_a2l(struct dso *dso,
+ struct inline_node *node,
+ struct symbol *sym)
+{
+ struct a2l_data *a2l = dso__a2l(dso);
+ struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
+ char *srcline = NULL;
+
+ if (a2l->filename)
+ srcline = srcline_from_fileline(a2l->filename, a2l->line);
+
+ return inline_list__append(inline_sym, srcline, node);
+}
+
+int libbfd__addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym)
+{
+ int ret = 0;
+ struct a2l_data *a2l = dso__a2l(dso);
+
+ if (!a2l) {
+ a2l = addr2line_init(dso_name);
+ dso__set_a2l(dso, a2l);
+ }
+
+ if (a2l == NULL) {
+ if (!symbol_conf.disable_add2line_warn)
+ pr_warning("addr2line_init failed for %s\n", dso_name);
+ return 0;
+ }
+
+ a2l->addr = addr;
+ a2l->found = false;
+
+ bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+ if (!a2l->found)
+ return 0;
+
+ if (unwind_inlines) {
+ int cnt = 0;
+
+ if (node && inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+
+ while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
+ &a2l->funcname, &a2l->line) &&
+ cnt++ < MAX_INLINE_NEST) {
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+
+ if (node != NULL) {
+ if (inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+ // found at least one inline frame
+ ret = 1;
+ }
+ }
+ }
+
+ if (file) {
+ *file = a2l->filename ? strdup(a2l->filename) : NULL;
+ ret = *file ? 1 : 0;
+ }
+
+ if (line)
+ *line = a2l->line;
+
+ return ret;
+}
+
+void dso__free_a2l_libbfd(struct dso *dso)
+{
+ struct a2l_data *a2l = dso__a2l(dso);
+
+ if (!a2l)
+ return;
+
+ addr2line_cleanup(a2l);
+
+ dso__set_a2l(dso, NULL);
+}
+
+static int bfd_symbols__cmpvalue(const void *a, const void *b)
+{
+ const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
+
+ if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
+ return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
+
+ return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
+}
+
+static int bfd2elf_binding(asymbol *symbol)
+{
+ if (symbol->flags & BSF_WEAK)
+ return STB_WEAK;
+ if (symbol->flags & BSF_GLOBAL)
+ return STB_GLOBAL;
+ if (symbol->flags & BSF_LOCAL)
+ return STB_LOCAL;
+ return -1;
+}
+
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+{
+ int err = -1;
+ long symbols_size, symbols_count, i;
+ asection *section;
+ asymbol **symbols, *sym;
+ struct symbol *symbol;
+ bfd *abfd;
+ u64 start, len;
+
+ ensure_bfd_init();
+ abfd = bfd_openr(debugfile, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ dso__long_name(dso));
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ symbols_size = bfd_get_symtab_upper_bound(abfd);
+ if (symbols_size == 0) {
+ bfd_close(abfd);
+ return 0;
+ }
+
+ if (symbols_size < 0)
+ goto out_close;
+
+ symbols = malloc(symbols_size);
+ if (!symbols)
+ goto out_close;
+
+ symbols_count = bfd_canonicalize_symtab(abfd, symbols);
+ if (symbols_count < 0)
+ goto out_free;
+
+ section = bfd_get_section_by_name(abfd, ".text");
+ if (section) {
+ for (i = 0; i < symbols_count; ++i) {
+ if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
+ !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
+ break;
+ }
+ if (i < symbols_count) {
+ /* PE symbols can only have 4 bytes, so use .text high bits */
+ u64 text_offset = (section->vma - (u32)section->vma)
+ + (u32)bfd_asymbol_value(symbols[i]);
+ dso__set_text_offset(dso, text_offset);
+ dso__set_text_end(dso, (section->vma - text_offset) + section->size);
+ } else {
+ dso__set_text_offset(dso, section->vma - section->filepos);
+ dso__set_text_end(dso, section->filepos + section->size);
+ }
+ }
+
+ qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
+
+#ifdef bfd_get_section
+#define bfd_asymbol_section bfd_get_section
+#endif
+ for (i = 0; i < symbols_count; ++i) {
+ sym = symbols[i];
+ section = bfd_asymbol_section(sym);
+ if (bfd2elf_binding(sym) < 0)
+ continue;
+
+ while (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section &&
+ bfd2elf_binding(symbols[i + 1]) < 0)
+ i++;
+
+ if (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section)
+ len = symbols[i + 1]->value - sym->value;
+ else
+ len = section->size - sym->value;
+
+ start = bfd_asymbol_value(sym) - dso__text_offset(dso);
+ symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
+ bfd_asymbol_name(sym));
+ if (!symbol)
+ goto out_free;
+
+ symbols__insert(dso__symbols(dso), symbol);
+ }
+#ifdef bfd_get_section
+#undef bfd_asymbol_section
+#endif
+
+ symbols__fixup_end(dso__symbols(dso), false);
+ symbols__fixup_duplicate(dso__symbols(dso));
+ dso__set_adjust_symbols(dso, true);
+
+ err = 0;
+out_free:
+ free(symbols);
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+int libbfd__read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int err = -1, fd;
+ bfd *abfd;
+
+ if (!filename)
+ return -EFAULT;
+ if (!is_regular_file(filename))
+ return -EWOULDBLOCK;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ ensure_bfd_init();
+ abfd = bfd_fdopenr(filename, /*target=*/NULL, fd);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ if (!abfd->build_id || abfd->build_id->size > size)
+ goto out_close;
+
+ memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
+ memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
+ err = bid->size = abfd->build_id->size;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+int libbfd_filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int err = -1;
+ asection *section;
+ bfd *abfd;
+
+ ensure_bfd_init();
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
+ if (!section)
+ goto out_close;
+
+ if (section->size > size)
+ goto out_close;
+
+ if (!bfd_get_section_contents(abfd, section, debuglink, 0,
+ section->size))
+ goto out_close;
+
+ err = 0;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused,
+ struct annotate_args *args __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+ struct annotation *notes = symbol__annotation(sym);
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct perf_bpil *info_linear;
+ struct disassemble_info info;
+ struct dso *dso = map__dso(map);
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ char *buf;
+ bfd *bfdf;
+ int ret;
+ FILE *s;
+
+ if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ perf_exe(tpath, sizeof(tpath));
+
+ ensure_bfd_init();
+ bfdf = bfd_openr(tpath, NULL);
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+ init_disassemble_info_compat(&info, s,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env,
+ dso__bpf_prog(dso)->id);
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ goto out;
+ }
+ info_linear = info_node->info_linear;
+ sub_id = dso__bpf_prog(dso)->sub_id;
+
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso__bpf_prog(dso)->env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ if (disassemble == NULL)
+ abort();
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ if (prog_linfo)
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+ addr, sub_id,
+ nr_skip);
+
+ if (linfo && btf) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!annotate_opts.hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl) {
+ annotation_line__add(&dl->al,
+ &notes->src->source);
+ }
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ ret = 0;
+out:
+ free(prog_linfo);
+ btf__free(btf);
+ fclose(s);
+ bfd_close(bfdf);
+ return ret;
+#else
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+#endif
+}
diff --git a/tools/perf/util/libbfd.h b/tools/perf/util/libbfd.h
new file mode 100644
index 000000000000..953886f3d62f
--- /dev/null
+++ b/tools/perf/util/libbfd.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LIBBFD_H
+#define __PERF_LIBBFD_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+struct annotate_args;
+struct build_id;
+struct dso;
+struct inline_node;
+struct symbol;
+
+#ifdef HAVE_LIBBFD_SUPPORT
+int libbfd__addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym);
+
+
+void dso__free_a2l_libbfd(struct dso *dso);
+
+int symbol__disassemble_libbfd(const char *filename, struct symbol *sym,
+ struct annotate_args *args);
+
+int libbfd__read_build_id(const char *filename, struct build_id *bid);
+
+int libbfd_filename__read_debuglink(const char *filename, char *debuglink, size_t size);
+
+int symbol__disassemble_bpf_libbfd(struct symbol *sym, struct annotate_args *args);
+
+#else // !defined(HAVE_LIBBFD_SUPPORT)
+#include "annotate.h"
+
+static inline int libbfd__addr2line(const char *dso_name __always_unused,
+ u64 addr __always_unused,
+ char **file __always_unused,
+ unsigned int *line __always_unused,
+ struct dso *dso __always_unused,
+ bool unwind_inlines __always_unused,
+ struct inline_node *node __always_unused,
+ struct symbol *sym __always_unused)
+{
+ return -1;
+}
+
+
+static inline void dso__free_a2l_libbfd(struct dso *dso __always_unused)
+{
+}
+
+static inline int symbol__disassemble_libbfd(const char *filename __always_unused,
+ struct symbol *sym __always_unused,
+ struct annotate_args *args __always_unused)
+{
+ return -1;
+}
+
+static inline int libbfd__read_build_id(const char *filename __always_unused,
+ struct build_id *bid __always_unused)
+{
+ return -1;
+}
+
+static inline int libbfd_filename__read_debuglink(const char *filename __always_unused,
+ char *debuglink __always_unused,
+ size_t size __always_unused)
+{
+ return -1;
+}
+
+static inline int symbol__disassemble_bpf_libbfd(struct symbol *sym __always_unused,
+ struct annotate_args *args __always_unused)
+{
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+
+#endif // defined(HAVE_LIBBFD_SUPPORT)
+
+#endif /* __PERF_LIBBFD_H */
diff --git a/tools/perf/util/llvm.c b/tools/perf/util/llvm.c
new file mode 100644
index 000000000000..2ebf1f5f65bf
--- /dev/null
+++ b/tools/perf/util/llvm.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "llvm.h"
+#include "annotate.h"
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+#include "llvm-c-helpers.h"
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#endif
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames,
+ int num_frames)
+{
+ if (inline_frames != NULL) {
+ for (int i = 0; i < num_frames; ++i) {
+ zfree(&inline_frames[i].filename);
+ zfree(&inline_frames[i].funcname);
+ }
+ zfree(&inline_frames);
+ }
+}
+#endif
+
+int llvm__addr2line(const char *dso_name __maybe_unused, u64 addr __maybe_unused,
+ char **file __maybe_unused, unsigned int *line __maybe_unused,
+ struct dso *dso __maybe_unused, bool unwind_inlines __maybe_unused,
+ struct inline_node *node __maybe_unused, struct symbol *sym __maybe_unused)
+{
+#ifdef HAVE_LIBLLVM_SUPPORT
+ struct llvm_a2l_frame *inline_frames = NULL;
+ int num_frames = llvm_addr2line(dso_name, addr, file, line,
+ node && unwind_inlines, &inline_frames);
+
+ if (num_frames == 0 || !inline_frames) {
+ /* Error, or we didn't want inlines. */
+ return num_frames;
+ }
+
+ for (int i = 0; i < num_frames; ++i) {
+ struct symbol *inline_sym =
+ new_inline_sym(dso, sym, inline_frames[i].funcname);
+ char *srcline = NULL;
+
+ if (inline_frames[i].filename) {
+ srcline =
+ srcline_from_fileline(inline_frames[i].filename,
+ inline_frames[i].line);
+ }
+ if (inline_list__append(inline_sym, srcline, node) != 0) {
+ free_llvm_inline_frames(inline_frames, num_frames);
+ return 0;
+ }
+ }
+ free_llvm_inline_frames(inline_frames, num_frames);
+
+ return num_frames;
+#else
+ return -1;
+#endif
+}
+
+#ifdef HAVE_LIBLLVM_SUPPORT
+static void init_llvm(void)
+{
+ static bool init;
+
+ if (!init) {
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+ init = true;
+ }
+}
+
+/*
+ * Whenever LLVM wants to resolve an address into a symbol, it calls this
+ * callback. We don't ever actually _return_ anything (in particular, because
+ * it puts quotation marks around what we return), but we use this as a hint
+ * that there is a branch or PC-relative address in the expression that we
+ * should add some textual annotation for after the instruction. The caller
+ * will use this information to add the actual annotation.
+ */
+struct symbol_lookup_storage {
+ u64 branch_addr;
+ u64 pcrel_load_addr;
+};
+
+static const char *
+symbol_lookup_callback(void *disinfo, uint64_t value,
+ uint64_t *ref_type,
+ uint64_t address __maybe_unused,
+ const char **ref __maybe_unused)
+{
+ struct symbol_lookup_storage *storage = disinfo;
+
+ if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
+ storage->branch_addr = value;
+ else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
+ storage->pcrel_load_addr = value;
+ *ref_type = LLVMDisassembler_ReferenceType_InOut_None;
+ return NULL;
+}
+#endif
+
+int symbol__disassemble_llvm(const char *filename, struct symbol *sym,
+ struct annotate_args *args __maybe_unused)
+{
+#ifdef HAVE_LIBLLVM_SUPPORT
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ u64 start = map__rip_2objdump(map, sym->start);
+ /* Malloc-ed buffer containing instructions read from disk. */
+ u8 *code_buf = NULL;
+ /* Pointer to code to be disassembled. */
+ const u8 *buf;
+ u64 buf_len;
+ u64 pc;
+ bool is_64bit;
+ char disasm_buf[2048];
+ size_t disasm_len;
+ struct disasm_line *dl;
+ LLVMDisasmContextRef disasm = NULL;
+ struct symbol_lookup_storage storage;
+ char *line_storage = NULL;
+ size_t line_storage_len = 0;
+ int ret = -1;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ buf = dso__read_symbol(dso, filename, map, sym,
+ &code_buf, &buf_len, &is_64bit);
+ if (buf == NULL)
+ return errno;
+
+ init_llvm();
+ if (arch__is(args->arch, "x86")) {
+ const char *triplet = is_64bit ? "x86_64-pc-linux" : "i686-pc-linux";
+
+ disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0,
+ /*get_op_info=*/NULL, symbol_lookup_callback);
+ } else {
+ char triplet[64];
+
+ scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
+ args->arch->name);
+ disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0,
+ /*get_op_info=*/NULL, symbol_lookup_callback);
+ }
+
+ if (disasm == NULL)
+ goto err;
+
+ if (args->options->disassembler_style &&
+ !strcmp(args->options->disassembler_style, "intel"))
+ LLVMSetDisasmOptions(disasm,
+ LLVMDisassembler_Option_AsmPrinterVariant);
+
+ /*
+ * This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
+ * setting AsmPrinterVariant makes a new instruction printer, making it
+ * forget about the PrintImmHex flag (which is applied before if both
+ * are given to the same call).
+ */
+ LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc = start;
+ for (u64 offset = 0; offset < buf_len; ) {
+ unsigned int ins_len;
+
+ storage.branch_addr = 0;
+ storage.pcrel_load_addr = 0;
+
+ /*
+ * LLVM's API has the code be disassembled as non-const, cast
+ * here as we may be disassembling from mapped read-only memory.
+ */
+ ins_len = LLVMDisasmInstruction(disasm, (u8 *)(buf + offset),
+ buf_len - offset, pc,
+ disasm_buf, sizeof(disasm_buf));
+ if (ins_len == 0)
+ goto err;
+ disasm_len = strlen(disasm_buf);
+
+ if (storage.branch_addr != 0) {
+ char *name = llvm_name_for_code(dso, filename,
+ storage.branch_addr);
+ if (name != NULL) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+ if (storage.pcrel_load_addr != 0) {
+ char *name = llvm_name_for_data(dso, filename,
+ storage.pcrel_load_addr);
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) - disasm_len,
+ " # %#"PRIx64,
+ storage.pcrel_load_addr);
+ if (name) {
+ disasm_len += scnprintf(disasm_buf + disasm_len,
+ sizeof(disasm_buf) -
+ disasm_len,
+ " <%s>", name);
+ free(name);
+ }
+ }
+
+ args->offset = offset;
+ args->line = expand_tabs(disasm_buf, &line_storage,
+ &line_storage_len);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ llvm_addr2line(filename, pc, &args->fileloc,
+ (unsigned int *)&args->line_nr, false, NULL);
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->fileloc);
+ pc += ins_len;
+ offset += ins_len;
+ }
+
+ ret = 0;
+
+err:
+ LLVMDisasmDispose(disasm);
+ free(code_buf);
+ free(line_storage);
+ return ret;
+#else // HAVE_LIBLLVM_SUPPORT
+ pr_debug("The LLVM disassembler isn't linked in for %s in %s\n",
+ sym->name, filename);
+ return -1;
+#endif
+}
diff --git a/tools/perf/util/llvm.h b/tools/perf/util/llvm.h
new file mode 100644
index 000000000000..57f6bafb24bb
--- /dev/null
+++ b/tools/perf/util/llvm.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LLVM_H
+#define __PERF_LLVM_H
+
+#include <stdbool.h>
+#include <linux/types.h>
+
+struct annotate_args;
+struct dso;
+struct inline_node;
+struct symbol;
+
+int llvm__addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym);
+
+int symbol__disassemble_llvm(const char *filename, struct symbol *sym,
+ struct annotate_args *args);
+
+#endif /* __PERF_LLVM_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index bbcd2ffcf4bd..c355757ed391 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -120,7 +120,7 @@ bool lzma_is_compressed(const char *input)
ssize_t rc;
if (fd < 0)
- return -1;
+ return false;
rc = read(fd, buf, sizeof(buf));
close(fd);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b5dd42588c91..841b711d970e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
*cpumode = PERF_RECORD_MISC_KERNEL;
break;
case PERF_CONTEXT_USER:
+ case PERF_CONTEXT_USER_DEFERRED:
*cpumode = PERF_RECORD_MISC_USER;
break;
default:
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b46c68c24d1c..41cdddc987ee 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -513,6 +513,8 @@ void srccode_state_free(struct srccode_state *state)
state->line = 0;
}
+static const struct kmap *__map__const_kmap(const struct map *map);
+
/**
* map__rip_2objdump - convert symbol start address to objdump address.
* @map: memory map
@@ -524,9 +526,9 @@ void srccode_state_free(struct srccode_state *state)
*
* Return: Address suitable for passing to "objdump --start-address="
*/
-u64 map__rip_2objdump(struct map *map, u64 rip)
+u64 map__rip_2objdump(const struct map *map, u64 rip)
{
- struct kmap *kmap = __map__kmap(map);
+ const struct kmap *kmap = __map__const_kmap(map);
const struct dso *dso = map__dso(map);
/*
@@ -569,7 +571,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
*
* Return: Memory address.
*/
-u64 map__objdump_2mem(struct map *map, u64 ip)
+u64 map__objdump_2mem(const struct map *map, u64 ip)
{
const struct dso *dso = map__dso(map);
@@ -586,7 +588,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
}
/* convert objdump address to relative address. (To be removed) */
-u64 map__objdump_2rip(struct map *map, u64 ip)
+u64 map__objdump_2rip(const struct map *map, u64 ip)
{
const struct dso *dso = map__dso(map);
@@ -618,6 +620,15 @@ struct kmap *__map__kmap(struct map *map)
return (struct kmap *)(&RC_CHK_ACCESS(map)[1]);
}
+static const struct kmap *__map__const_kmap(const struct map *map)
+{
+ const struct dso *dso = map__dso(map);
+
+ if (!dso || !dso__kernel(dso))
+ return NULL;
+ return (struct kmap *)(&RC_CHK_ACCESS(map)[1]);
+}
+
struct kmap *map__kmap(struct map *map)
{
struct kmap *kmap = __map__kmap(map);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 9cadf533a561..979b3e11b9bc 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -133,13 +133,13 @@ static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip)
}
/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
-u64 map__rip_2objdump(struct map *map, u64 rip);
+u64 map__rip_2objdump(const struct map *map, u64 rip);
/* objdump address -> memory address */
-u64 map__objdump_2mem(struct map *map, u64 ip);
+u64 map__objdump_2mem(const struct map *map, u64 ip);
/* objdump address -> rip */
-u64 map__objdump_2rip(struct map *map, u64 ip);
+u64 map__objdump_2rip(const struct map *map, u64 ip);
struct symbol;
struct thread;
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 85b2a93a59ac..c321d4f4d846 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -477,6 +477,7 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
/* Insert the value at the end. */
maps_by_address[nr_maps] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name)
maps_by_name[nr_maps] = map__get(new);
@@ -502,8 +503,6 @@ static int __maps__insert(struct maps *maps, struct map *new)
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- map__set_kmap_maps(new, maps);
-
return 0;
}
@@ -891,6 +890,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+ map__set_kmap_maps(before, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -918,6 +918,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -930,8 +931,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
return err;
} else {
struct map *next = NULL;
+ unsigned int nr_maps = maps__nr_maps(maps);
- if (i + 1 < maps__nr_maps(maps))
+ if (i + 1 < nr_maps)
next = maps_by_address[i + 1];
if (!next || map__start(next) >= map__end(new)) {
@@ -942,18 +944,34 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
maps_by_name[ni] = map__get(new);
}
- map__set_kmap_maps(new, maps);
-
check_invariants(maps);
return err;
}
- __maps__remove(maps, pos);
+ /*
+ * pos fully covers the previous mapping so remove
+ * it. The following is an inlined version of
+ * maps__remove that reuses the already computed
+ * indices.
+ */
+ map__put(maps_by_address[i]);
+ memmove(&maps_by_address[i],
+ &maps_by_address[i + 1],
+ (nr_maps - i - 1) * sizeof(*maps_by_address));
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ memmove(&maps_by_name[ni],
+ &maps_by_name[ni + 1],
+ (nr_maps - ni - 1) * sizeof(*maps_by_name));
+ }
+ --RC_CHK_ACCESS(maps)->nr_maps;
check_invariants(maps);
/*
* Maps are ordered but no need to increase `i` as the
@@ -1019,6 +1037,7 @@ int maps__copy_from(struct maps *dest, struct maps *parent)
err = unwind__prepare_access(dest, new, NULL);
if (!err) {
dest_maps_by_address[i] = new;
+ map__set_kmap_maps(new, dest);
if (dest_maps_by_name)
dest_maps_by_name[i] = map__get(new);
RC_CHK_ACCESS(dest)->nr_maps = i + 1;
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 80b3069427bc..0b49fce251fc 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -303,12 +303,15 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **eve
}
if (cpu_map) {
- if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (!perf_cpu_map__equal(cpu_map, online)) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
+ perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 595b83142d2c..25c75fdbfc52 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -152,6 +152,8 @@ struct metric {
* Should events of the metric be grouped?
*/
bool group_events;
+ /** Show events even if in the Default metric group. */
+ bool default_show_events;
/**
* Parsed events for the metric. Optional as events may be taken from a
* different metric whose group contains all the IDs necessary for this
@@ -255,6 +257,7 @@ static struct metric *metric__new(const struct pmu_metric *pm,
m->pctx->sctx.runtime = runtime;
m->pctx->sctx.system_wide = system_wide;
m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold);
+ m->default_show_events = pm->default_show_events;
m->metric_refs = NULL;
m->evlist = NULL;
@@ -424,10 +427,18 @@ int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metr
.fn = fn,
.data = data,
};
+ const struct pmu_metrics_table *tables[2] = {
+ table,
+ pmu_metrics_table__default(),
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(tables); i++) {
+ int ret;
- if (table) {
- int ret = pmu_metrics_table__for_each_metric(table, fn, data);
+ if (!tables[i])
+ continue;
+ ret = pmu_metrics_table__for_each_metric(tables[i], fn, data);
if (ret)
return ret;
}
@@ -1323,6 +1334,51 @@ err_out:
return ret;
}
+/* How many times will a given evsel be used in a set of metrics? */
+static int count_uses(struct list_head *metric_list, struct evsel *evsel)
+{
+ const char *metric_id = evsel__metric_id(evsel);
+ struct metric *m;
+ int uses = 0;
+
+ list_for_each_entry(m, metric_list, nd) {
+ if (hashmap__find(m->pctx->ids, metric_id, NULL))
+ uses++;
+ }
+ return uses;
+}
+
+/*
+ * Select the evsel that stat-display will use to trigger shadow/metric
+ * printing. Pick the least shared non-tool evsel, encouraging metrics to be
+ * with a hardware counter that is specific to them.
+ */
+static struct evsel *pick_display_evsel(struct list_head *metric_list,
+ struct evsel **metric_events)
+{
+ struct evsel *selected = metric_events[0];
+ size_t selected_uses;
+ bool selected_is_tool;
+
+ if (!selected)
+ return NULL;
+
+ selected_uses = count_uses(metric_list, selected);
+ selected_is_tool = evsel__is_tool(selected);
+ for (int i = 1; metric_events[i]; i++) {
+ struct evsel *candidate = metric_events[i];
+ size_t candidate_uses = count_uses(metric_list, candidate);
+
+ if ((selected_is_tool && !evsel__is_tool(candidate)) ||
+ (candidate_uses < selected_uses)) {
+ selected = candidate;
+ selected_uses = candidate_uses;
+ selected_is_tool = evsel__is_tool(selected);
+ }
+ }
+ return selected;
+}
+
static int parse_groups(struct evlist *perf_evlist,
const char *pmu, const char *str,
bool metric_no_group,
@@ -1430,7 +1486,8 @@ static int parse_groups(struct evlist *perf_evlist,
goto out;
}
- me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+ me = metricgroup__lookup(&perf_evlist->metric_events,
+ pick_display_evsel(&metric_list, metric_events),
/*create=*/true);
expr = malloc(sizeof(struct metric_expr));
@@ -1455,9 +1512,20 @@ static int parse_groups(struct evlist *perf_evlist,
if (!expr->metric_name) {
ret = -ENOMEM;
+ free(expr);
free(metric_events);
goto out;
}
+ if (m->default_show_events) {
+ struct evsel *pos;
+
+ for (int i = 0; metric_events[i]; i++)
+ metric_events[i]->default_show_events = true;
+ evlist__for_each_entry(metric_evlist, pos) {
+ if (pos->metric_leader && pos->metric_leader->default_show_events)
+ pos->default_show_events = true;
+ }
+ }
expr->metric_threshold = m->metric_threshold;
expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
@@ -1534,19 +1602,22 @@ static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *p
bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups)
{
- const struct pmu_metrics_table *table = pmu_metrics_table__find();
+ const struct pmu_metrics_table *tables[2] = {
+ pmu_metrics_table__find(),
+ pmu_metrics_table__default(),
+ };
struct metricgroup__has_metric_data data = {
.pmu = pmu,
.metric_or_groups = metric_or_groups,
};
- if (!table)
- return false;
-
- return pmu_metrics_table__for_each_metric(table,
- metricgroup__has_metric_or_groups_callback,
- &data)
- ? true : false;
+ for (size_t i = 0; i < ARRAY_SIZE(tables); i++) {
+ if (pmu_metrics_table__for_each_metric(tables[i],
+ metricgroup__has_metric_or_groups_callback,
+ &data))
+ return true;
+ }
+ return false;
}
static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm,
@@ -1607,6 +1678,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n",
cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx);
+ new_me->is_default = old_me->is_default;
list_for_each_entry(old_expr, &old_me->head, nd) {
new_expr = malloc(sizeof(*new_expr));
if (!new_expr)
@@ -1620,6 +1692,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
new_expr->metric_unit = old_expr->metric_unit;
new_expr->runtime = old_expr->runtime;
+ new_expr->default_metricgroup_name = old_expr->default_metricgroup_name;
if (old_expr->metric_refs) {
/* calculate number of metric_events */
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 324880b2ed8f..4be6bfc13c46 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -16,7 +16,7 @@ struct cgroup;
/**
* A node in a rblist keyed by the evsel. The global rblist of metric events
- * generally exists in perf_stat_config. The evsel is looked up in the rblist
+ * generally exists in evlist. The evsel is looked up in the rblist
* yielding a list of metric_expr.
*/
struct metric_event {
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index a34726219af3..b69f926d314b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -7,6 +7,7 @@
*/
#include <sys/mman.h>
+#include <errno.h>
#include <inttypes.h>
#include <asm/bug.h>
#include <linux/zalloc.h>
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
index bca7f0717f35..7aa1f3f55a7d 100644
--- a/tools/perf/util/mutex.c
+++ b/tools/perf/util/mutex.c
@@ -17,7 +17,7 @@ static void check_err(const char *fn, int err)
#define CHECK_ERR(err) check_err(__func__, err)
-static void __mutex_init(struct mutex *mtx, bool pshared)
+static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive)
{
pthread_mutexattr_t attr;
@@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared)
/* In normal builds enable error checking, such as recursive usage. */
CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
#endif
+ if (recursive)
+ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
if (pshared)
CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
-
CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
CHECK_ERR(pthread_mutexattr_destroy(&attr));
}
void mutex_init(struct mutex *mtx)
{
- __mutex_init(mtx, /*pshared=*/false);
+ __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false);
}
void mutex_init_pshared(struct mutex *mtx)
{
- __mutex_init(mtx, /*pshared=*/true);
+ __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false);
+}
+
+void mutex_init_recursive(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true);
}
void mutex_destroy(struct mutex *mtx)
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 38458f00846f..70232d8d094f 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -104,6 +104,8 @@ void mutex_init(struct mutex *mtx);
* process-private attribute.
*/
void mutex_init_pshared(struct mutex *mtx);
+/* Initializes a mutex that may be recursively held on the same thread. */
+void mutex_init_recursive(struct mutex *mtx);
void mutex_destroy(struct mutex *mtx);
void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 68f5de2d79c7..01502570b32d 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -6,7 +6,6 @@
#include "namespaces.h"
#include "event.h"
-#include "get_current_dir_name.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -293,14 +292,14 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
if (!nsi || !nsinfo__need_setns(nsi))
return;
- if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+ if (!getcwd(curpath, sizeof(curpath)))
return;
- oldcwd = get_current_dir_name();
+ oldcwd = strdup(curpath);
if (!oldcwd)
return;
- oldns = open(curpath, O_RDONLY);
+ oldns = open("/proc/self/ns/mnt", O_RDONLY);
if (oldns < 0)
goto errout;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8282ddf68b98..17c1c36a7bf9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -30,6 +30,7 @@
#include "util/event.h"
#include "util/bpf-filter.h"
#include "util/stat.h"
+#include "util/tool_pmu.h"
#include "util/util.h"
#include "tracepoint.h"
#include <api/fs/tracing_path.h>
@@ -40,49 +41,7 @@ static int get_config_terms(const struct parse_events_terms *head_config,
struct list_head *head_terms);
static int parse_events_terms__copy(const struct parse_events_terms *src,
struct parse_events_terms *dest);
-
-const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
- [PERF_COUNT_HW_CPU_CYCLES] = {
- .symbol = "cpu-cycles",
- .alias = "cycles",
- },
- [PERF_COUNT_HW_INSTRUCTIONS] = {
- .symbol = "instructions",
- .alias = "",
- },
- [PERF_COUNT_HW_CACHE_REFERENCES] = {
- .symbol = "cache-references",
- .alias = "",
- },
- [PERF_COUNT_HW_CACHE_MISSES] = {
- .symbol = "cache-misses",
- .alias = "",
- },
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
- .symbol = "branch-instructions",
- .alias = "branches",
- },
- [PERF_COUNT_HW_BRANCH_MISSES] = {
- .symbol = "branch-misses",
- .alias = "",
- },
- [PERF_COUNT_HW_BUS_CYCLES] = {
- .symbol = "bus-cycles",
- .alias = "",
- },
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
- .symbol = "stalled-cycles-frontend",
- .alias = "idle-cycles-frontend",
- },
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
- .symbol = "stalled-cycles-backend",
- .alias = "idle-cycles-backend",
- },
- [PERF_COUNT_HW_REF_CPU_CYCLES] = {
- .symbol = "ref-cycles",
- .alias = "",
- },
-};
+static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb);
static const char *const event_types[] = {
[PERF_TYPE_HARDWARE] = "hardware",
@@ -126,7 +85,8 @@ static char *get_config_name(const struct parse_events_terms *head_terms)
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
}
-static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms)
+static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms,
+ bool fake_pmu)
{
struct parse_events_term *term;
struct perf_cpu_map *cpus = NULL;
@@ -135,24 +95,33 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head
return NULL;
list_for_each_entry(term, &head_terms->terms, list) {
- if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
- struct perf_cpu_map *term_cpus;
+ struct perf_cpu_map *term_cpus;
+
+ if (term->type_term != PARSE_EVENTS__TERM_TYPE_CPU)
+ continue;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ term_cpus = perf_cpu_map__new_int(term->val.num);
+ } else {
+ struct perf_pmu *pmu = perf_pmus__find(term->val.str);
- if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
- term_cpus = perf_cpu_map__new_int(term->val.num);
+ if (pmu) {
+ term_cpus = pmu->is_core && perf_cpu_map__is_empty(pmu->cpus)
+ ? cpu_map__online()
+ : perf_cpu_map__get(pmu->cpus);
} else {
- struct perf_pmu *pmu = perf_pmus__find(term->val.str);
-
- if (pmu && perf_cpu_map__is_empty(pmu->cpus))
- term_cpus = pmu->is_core ? cpu_map__online() : NULL;
- else if (pmu)
- term_cpus = perf_cpu_map__get(pmu->cpus);
- else
- term_cpus = perf_cpu_map__new(term->val.str);
+ term_cpus = perf_cpu_map__new(term->val.str);
+ if (!term_cpus && fake_pmu) {
+ /*
+ * Assume the PMU string makes sense on a different
+ * machine and fake a value with all online CPUs.
+ */
+ term_cpus = cpu_map__online();
+ }
}
- perf_cpu_map__merge(&cpus, term_cpus);
- perf_cpu_map__put(term_cpus);
}
+ perf_cpu_map__merge(&cpus, term_cpus);
+ perf_cpu_map__put(term_cpus);
}
return cpus;
@@ -247,6 +216,8 @@ __add_event(struct list_head *list, int *idx,
PERF_PMU_FORMAT_VALUE_CONFIG2, "config2");
perf_pmu__warn_invalid_config(pmu, attr->config3, name,
PERF_PMU_FORMAT_VALUE_CONFIG3, "config3");
+ perf_pmu__warn_invalid_config(pmu, attr->config4, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG4, "config4");
}
}
/*
@@ -259,8 +230,12 @@ __add_event(struct list_head *list, int *idx,
if (pmu) {
is_pmu_core = pmu->is_core;
pmu_cpus = perf_cpu_map__get(pmu->cpus);
- if (perf_cpu_map__is_empty(pmu_cpus))
- pmu_cpus = cpu_map__online();
+ if (perf_cpu_map__is_empty(pmu_cpus)) {
+ if (perf_pmu__is_tool(pmu))
+ pmu_cpus = tool_pmu__cpus(attr);
+ else
+ pmu_cpus = cpu_map__online();
+ }
} else {
is_pmu_core = (attr->type == PERF_TYPE_HARDWARE ||
attr->type == PERF_TYPE_HW_CACHE);
@@ -369,13 +344,13 @@ static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_A
typedef int config_term_func_t(struct perf_event_attr *attr,
struct parse_events_term *term,
- struct parse_events_error *err);
+ struct parse_events_state *parse_state);
static int config_term_common(struct perf_event_attr *attr,
struct parse_events_term *term,
- struct parse_events_error *err);
+ struct parse_events_state *parse_state);
static int config_attr(struct perf_event_attr *attr,
const struct parse_events_terms *head,
- struct parse_events_error *err,
+ struct parse_events_state *parse_state,
config_term_func_t config_term);
/**
@@ -461,85 +436,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- struct evsel *first_wildcard_match, u64 alternate_hw_config);
-
-int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
- struct parse_events_state *parse_state,
- struct parse_events_terms *parsed_terms)
-{
- struct perf_pmu *pmu = NULL;
- bool found_supported = false;
- const char *config_name = get_config_name(parsed_terms);
- const char *metric_id = get_config_metric_id(parsed_terms);
- struct perf_cpu_map *cpus = get_config_cpu(parsed_terms);
- int ret = 0;
- struct evsel *first_wildcard_match = NULL;
-
- while ((pmu = perf_pmus__scan_for_event(pmu, name)) != NULL) {
- LIST_HEAD(config_terms);
- struct perf_event_attr attr;
-
- if (parse_events__filter_pmu(parse_state, pmu))
- continue;
-
- if (perf_pmu__have_event(pmu, name)) {
- /*
- * The PMU has the event so add as not a legacy cache
- * event.
- */
- ret = parse_events_add_pmu(parse_state, list, pmu,
- parsed_terms,
- first_wildcard_match,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX);
- if (ret)
- goto out_err;
- if (first_wildcard_match == NULL)
- first_wildcard_match =
- container_of(list->prev, struct evsel, core.node);
- continue;
- }
-
- if (!pmu->is_core) {
- /* Legacy cache events are only supported by core PMUs. */
- continue;
- }
-
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_HW_CACHE;
-
- ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config);
- if (ret)
- return ret;
-
- found_supported = true;
-
- if (parsed_terms) {
- if (config_attr(&attr, parsed_terms, parse_state->error,
- config_term_common)) {
- ret = -EINVAL;
- goto out_err;
- }
- if (get_config_terms(parsed_terms, &config_terms)) {
- ret = -ENOMEM;
- goto out_err;
- }
- }
-
- if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
- metric_id, pmu, &config_terms, first_wildcard_match,
- cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
- ret = -ENOMEM;
-
- if (first_wildcard_match == NULL)
- first_wildcard_match = container_of(list->prev, struct evsel, core.node);
- free_config_terms(&config_terms);
- if (ret)
- goto out_err;
- }
-out_err:
- perf_cpu_map__put(cpus);
- return found_supported ? 0 : -EINVAL;
-}
+ struct evsel *first_wildcard_match);
static void tracepoint_error(struct parse_events_error *e, int err,
const char *sys, const char *name, int column)
@@ -767,8 +664,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
attr.sample_period = 1;
if (head_config) {
- if (config_attr(&attr, head_config, parse_state->error,
- config_term_common))
+ if (config_attr(&attr, head_config, parse_state, config_term_common))
return -EINVAL;
if (get_config_terms(head_config, &config_terms))
@@ -811,6 +707,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1",
[PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2",
[PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG4] = "config4",
[PARSE_EVENTS__TERM_TYPE_NAME] = "name",
[PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period",
[PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq",
@@ -831,9 +728,10 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size",
[PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id",
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
- [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
- [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
+ [PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG] = "legacy-hardware-config",
+ [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG] = "legacy-cache-config",
[PARSE_EVENTS__TERM_TYPE_CPU] = "cpu",
+ [PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev",
};
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
return "unknown term";
@@ -859,6 +757,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
case PARSE_EVENTS__TERM_TYPE_NAME:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
@@ -882,8 +781,9 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
case PARSE_EVENTS__TERM_TYPE_RAW:
- case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
- case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG:
default:
if (!err)
return false;
@@ -903,12 +803,12 @@ void parse_events__shrink_config_terms(void)
static int config_term_common(struct perf_event_attr *attr,
struct parse_events_term *term,
- struct parse_events_error *err)
+ struct parse_events_state *parse_state)
{
-#define CHECK_TYPE_VAL(type) \
-do { \
- if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
- return -EINVAL; \
+#define CHECK_TYPE_VAL(type) \
+do { \
+ if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+ return -EINVAL; \
} while (0)
switch (term->type_term) {
@@ -928,6 +828,10 @@ do { \
CHECK_TYPE_VAL(NUM);
attr->config3 = term->val.num;
break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
+ CHECK_TYPE_VAL(NUM);
+ attr->config4 = term->val.num;
+ break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
CHECK_TYPE_VAL(NUM);
break;
@@ -939,7 +843,7 @@ do { \
if (strcmp(term->val.str, "no") &&
parse_branch_str(term->val.str,
&attr->branch_sample_type)) {
- parse_events_error__handle(err, term->err_val,
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("invalid branch sample type"),
NULL);
return -EINVAL;
@@ -948,7 +852,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_TIME:
CHECK_TYPE_VAL(NUM);
if (term->val.num > 1) {
- parse_events_error__handle(err, term->err_val,
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("expected 0 or 1"),
NULL);
return -EINVAL;
@@ -990,7 +894,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_PERCORE:
CHECK_TYPE_VAL(NUM);
if ((unsigned int)term->val.num > 1) {
- parse_events_error__handle(err, term->err_val,
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("expected 0 or 1"),
NULL);
return -EINVAL;
@@ -1005,7 +909,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
CHECK_TYPE_VAL(NUM);
if (term->val.num > UINT_MAX) {
- parse_events_error__handle(err, term->err_val,
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("too big"),
NULL);
return -EINVAL;
@@ -1016,7 +920,7 @@ do { \
if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
- parse_events_error__handle(err, term->err_val,
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("too big"),
/*help=*/NULL);
return -EINVAL;
@@ -1028,8 +932,8 @@ do { \
break;
map = perf_cpu_map__new(term->val.str);
- if (!map) {
- parse_events_error__handle(err, term->err_val,
+ if (!map && !parse_state->fake_pmu) {
+ parse_events_error__handle(parse_state->error, term->err_val,
strdup("not a valid PMU or CPU number"),
/*help=*/NULL);
return -EINVAL;
@@ -1037,12 +941,27 @@ do { \
perf_cpu_map__put(map);
break;
}
+ case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
+ CHECK_TYPE_VAL(STR);
+ if (strtod(term->val.str, NULL) <= 0) {
+ parse_events_error__handle(parse_state->error, term->err_val,
+ strdup("zero or negative"),
+ NULL);
+ return -EINVAL;
+ }
+ if (errno == ERANGE) {
+ parse_events_error__handle(parse_state->error, term->err_val,
+ strdup("too big"),
+ NULL);
+ return -EINVAL;
+ }
+ break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_USER:
- case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
- case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG:
default:
- parse_events_error__handle(err, term->err_term,
+ parse_events_error__handle(parse_state->error, term->err_term,
strdup(parse_events__term_type_str(term->type_term)),
parse_events_formats_error_string(NULL));
return -EINVAL;
@@ -1057,67 +976,72 @@ do { \
* if an invalid config term is provided for legacy events
* (for example, instructions/badterm/...), which is confusing.
*/
- if (!config_term_avail(term->type_term, err))
+ if (!config_term_avail(term->type_term, parse_state->error))
return -EINVAL;
return 0;
#undef CHECK_TYPE_VAL
}
-static int config_term_pmu(struct perf_event_attr *attr,
- struct parse_events_term *term,
- struct parse_events_error *err)
+static bool check_pmu_is_core(__u32 type, const struct parse_events_term *term,
+ struct parse_events_error *err)
{
- if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
- struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+ struct perf_pmu *pmu = NULL;
- if (!pmu) {
- char *err_str;
+ /* Avoid loading all PMUs with perf_pmus__find_by_type, just scan the core ones. */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (pmu->type == type)
+ return true;
+ }
+ parse_events_error__handle(err, term->err_val,
+ strdup("needs a core PMU"),
+ NULL);
+ return false;
+}
- if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
- parse_events_error__handle(err, term->err_term,
- err_str, /*help=*/NULL);
+static int config_term_pmu(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_state *parse_state)
+{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG) {
+ if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM))
+ return -EINVAL;
+ if (term->val.num >= PERF_COUNT_HW_MAX) {
+ parse_events_error__handle(parse_state->error, term->err_val,
+ strdup("too big"),
+ NULL);
return -EINVAL;
}
- /*
- * Rewrite the PMU event to a legacy cache one unless the PMU
- * doesn't support legacy cache events or the event is present
- * within the PMU.
- */
- if (perf_pmu__supports_legacy_cache(pmu) &&
- !perf_pmu__have_event(pmu, term->config)) {
- attr->type = PERF_TYPE_HW_CACHE;
- return parse_events__decode_legacy_cache(term->config, pmu->type,
- &attr->config);
- } else {
- term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
- term->no_value = true;
- }
+ if (!check_pmu_is_core(attr->type, term, parse_state->error))
+ return -EINVAL;
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT;
+ attr->type = PERF_TYPE_HARDWARE;
+ return 0;
}
- if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) {
- struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
-
- if (!pmu) {
- char *err_str;
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG) {
+ int cache_type, cache_op, cache_result;
- if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
- parse_events_error__handle(err, term->err_term,
- err_str, /*help=*/NULL);
+ if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM))
+ return -EINVAL;
+ cache_type = term->val.num & 0xFF;
+ cache_op = (term->val.num >> 8) & 0xFF;
+ cache_result = (term->val.num >> 16) & 0xFF;
+ if ((term->val.num & ~0xFFFFFF) ||
+ cache_type >= PERF_COUNT_HW_CACHE_MAX ||
+ cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+ cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) {
+ parse_events_error__handle(parse_state->error, term->err_val,
+ strdup("too big"),
+ NULL);
return -EINVAL;
}
- /*
- * If the PMU has a sysfs or json event prefer it over
- * legacy. ARM requires this.
- */
- if (perf_pmu__have_event(pmu, term->config)) {
- term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
- term->no_value = true;
- term->alternate_hw_config = true;
- } else {
- attr->type = PERF_TYPE_HARDWARE;
- attr->config = term->val.num;
- if (perf_pmus__supports_extended_type())
- attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
- }
+ if (!check_pmu_is_core(attr->type, term, parse_state->error))
+ return -EINVAL;
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT;
+ attr->type = PERF_TYPE_HW_CACHE;
return 0;
}
if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
@@ -1128,12 +1052,12 @@ static int config_term_pmu(struct perf_event_attr *attr,
*/
return 0;
}
- return config_term_common(attr, term, err);
+ return config_term_common(attr, term, parse_state);
}
static int config_term_tracepoint(struct perf_event_attr *attr,
struct parse_events_term *term,
- struct parse_events_error *err)
+ struct parse_events_state *parse_state)
{
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
@@ -1147,12 +1071,15 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
- return config_term_common(attr, term, err);
+ return config_term_common(attr, term, parse_state);
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_CONFIG:
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG:
case PARSE_EVENTS__TERM_TYPE_NAME:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
@@ -1162,16 +1089,13 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_PERCORE:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_RAW:
- case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
- case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
+ case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
default:
- if (err) {
- parse_events_error__handle(err, term->err_term,
+ parse_events_error__handle(parse_state->error, term->err_term,
strdup(parse_events__term_type_str(term->type_term)),
strdup("valid terms: call-graph,stack-size\n")
);
- }
return -EINVAL;
}
@@ -1180,13 +1104,13 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
static int config_attr(struct perf_event_attr *attr,
const struct parse_events_terms *head,
- struct parse_events_error *err,
+ struct parse_events_state *parse_state,
config_term_func_t config_term)
{
struct parse_events_term *term;
list_for_each_entry(term, &head->terms, list)
- if (config_term(attr, term, err))
+ if (config_term(attr, term, parse_state))
return -EINVAL;
return 0;
@@ -1289,16 +1213,20 @@ do { \
ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
term->val.num, term->weak);
break;
+ case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
+ ADD_CONFIG_TERM_STR(RATIO_TO_PREV, term->val.str, term->weak);
+ break;
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_CONFIG:
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG:
case PARSE_EVENTS__TERM_TYPE_NAME:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_RAW:
- case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
- case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
default:
break;
@@ -1332,6 +1260,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG:
case PARSE_EVENTS__TERM_TYPE_NAME:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
@@ -1352,9 +1283,8 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_RAW:
- case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
- case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
+ case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
default:
break;
}
@@ -1378,8 +1308,7 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
if (head_config) {
struct perf_event_attr attr;
- if (config_attr(&attr, head_config, err,
- config_term_tracepoint))
+ if (config_attr(&attr, head_config, parse_state, config_term_tracepoint))
return -EINVAL;
}
@@ -1408,8 +1337,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
}
if (head_config) {
- if (config_attr(&attr, head_config, parse_state->error,
- config_term_common))
+ if (config_attr(&attr, head_config, parse_state, config_term_common))
return -EINVAL;
if (get_config_terms(head_config, &config_terms))
@@ -1418,7 +1346,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
- cpus = get_config_cpu(head_config);
+ cpus = get_config_cpu(head_config, parse_state->fake_pmu);
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
metric_id, pmu, &config_terms, first_wildcard_match,
cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM;
@@ -1479,8 +1407,9 @@ static bool config_term_percore(struct list_head *config_terms)
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- struct evsel *first_wildcard_match, u64 alternate_hw_config)
+ struct evsel *first_wildcard_match)
{
+ u64 alternate_hw_config = PERF_COUNT_HW_MAX;
struct perf_event_attr attr;
struct perf_pmu_info info;
struct evsel *evsel;
@@ -1531,7 +1460,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
fix_raw(&parsed_terms, pmu);
/* Configure attr/terms with a known PMU, this will set hardcoded terms. */
- if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
+ if (config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) {
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
}
@@ -1555,7 +1484,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
/* Configure attr/terms again if an alias was expanded. */
if (alias_rewrote_terms &&
- config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
+ config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) {
parse_events_terms__exit(&parsed_terms);
return -EINVAL;
}
@@ -1583,7 +1512,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
return -EINVAL;
}
- term_cpu = get_config_cpu(&parsed_terms);
+ term_cpu = get_config_cpu(&parsed_terms, parse_state->fake_pmu);
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
get_config_name(&parsed_terms),
get_config_metric_id(&parsed_terms), pmu,
@@ -1613,7 +1542,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name, u64 hw_config,
+ const char *event_name,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc_)
{
@@ -1665,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
continue;
if (!parse_events_add_pmu(parse_state, list, pmu,
- &parsed_terms, first_wildcard_match, hw_config)) {
+ &parsed_terms, first_wildcard_match)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1680,7 +1609,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
if (parse_state->fake_pmu) {
if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms,
- first_wildcard_match, hw_config)) {
+ first_wildcard_match)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1722,15 +1651,13 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Attempt to add to list assuming event_or_pmu is a PMU name. */
pmu = perf_pmus__find(event_or_pmu);
if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
- first_wildcard_match,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX))
+ first_wildcard_match))
return 0;
if (parse_state->fake_pmu) {
if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(),
const_parsed_terms,
- first_wildcard_match,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX))
+ first_wildcard_match))
return 0;
}
@@ -1743,8 +1670,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
if (!parse_events_add_pmu(parse_state, *listp, pmu,
const_parsed_terms,
- first_wildcard_match,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) {
+ first_wildcard_match)) {
ok++;
parse_state->wild_card_pmus = true;
}
@@ -1758,7 +1684,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Failure to add, assume event_or_pmu is an event name. */
zfree(listp);
- if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX,
+ if (!parse_events_multi_pmu_add(parse_state, event_or_pmu,
const_parsed_terms, listp, loc))
return 0;
@@ -1892,6 +1818,8 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
evsel->bpf_counter = true;
if (mod.retire_lat)
evsel->retire_lat = true;
+ if (mod.dont_regroup)
+ evsel->dont_regroup = true;
}
return 0;
}
@@ -1929,7 +1857,6 @@ int parse_events__set_default_name(struct list_head *list, char *name)
}
static int parse_events__scanner(const char *str,
- FILE *input,
struct parse_events_state *parse_state)
{
YY_BUFFER_STATE buffer;
@@ -1940,10 +1867,7 @@ static int parse_events__scanner(const char *str,
if (ret)
return ret;
- if (str)
- buffer = parse_events__scan_string(str, scanner);
- else
- parse_events_set_in(input, scanner);
+ buffer = parse_events__scan_string(str, scanner);
#ifdef PARSER_DEBUG
parse_events_debug = 1;
@@ -1951,10 +1875,8 @@ static int parse_events__scanner(const char *str,
#endif
ret = parse_events_parse(parse_state, scanner);
- if (str) {
- parse_events__flush_buffer(buffer, scanner);
- parse_events__delete_buffer(buffer, scanner);
- }
+ parse_events__flush_buffer(buffer, scanner);
+ parse_events__delete_buffer(buffer, scanner);
parse_events_lex_destroy(scanner);
return ret;
}
@@ -1962,7 +1884,7 @@ static int parse_events__scanner(const char *str,
/*
* parse event config string, return a list of event terms.
*/
-int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input)
+int parse_events_terms(struct parse_events_terms *terms, const char *str)
{
struct parse_events_state parse_state = {
.terms = NULL,
@@ -1970,7 +1892,7 @@ int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *
};
int ret;
- ret = parse_events__scanner(str, input, &parse_state);
+ ret = parse_events__scanner(str, &parse_state);
if (!ret)
list_splice(&parse_state.terms->terms, &terms->terms);
@@ -2067,14 +1989,18 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
* event's index is used. An index may be forced for events that
* must be in the same group, namely Intel topdown events.
*/
- if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
+ if (lhs->dont_regroup) {
+ lhs_sort_idx = lhs_core->idx;
+ } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
lhs_sort_idx = *force_grouped_idx;
} else {
bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx;
}
- if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
+ if (rhs->dont_regroup) {
+ rhs_sort_idx = rhs_core->idx;
+ } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
rhs_sort_idx = *force_grouped_idx;
} else {
bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
@@ -2172,10 +2098,10 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
idx = 0;
list_for_each_entry(pos, list, core.node) {
- const struct evsel *pos_leader = evsel__leader(pos);
+ struct evsel *pos_leader = evsel__leader(pos);
const char *pos_pmu_name = pos->group_pmu_name;
const char *cur_leader_pmu_name;
- bool pos_force_grouped = force_grouped_idx != -1 &&
+ bool pos_force_grouped = force_grouped_idx != -1 && !pos->dont_regroup &&
arch_evsel__must_be_in_group(pos);
/* Reset index and nr_members. */
@@ -2188,13 +2114,12 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
* Set the group leader respecting the given groupings and that
* groups can't span PMUs.
*/
- if (!cur_leader) {
- cur_leader = pos;
- cur_leaders_grp = &pos->core;
+ if (!cur_leader || pos->dont_regroup) {
+ cur_leader = pos->dont_regroup ? pos_leader : pos;
+ cur_leaders_grp = &cur_leader->core;
if (pos_force_grouped)
force_grouped_leader = pos;
}
-
cur_leader_pmu_name = cur_leader->group_pmu_name;
if (strcmp(cur_leader_pmu_name, pos_pmu_name)) {
/* PMU changed so the group/leader must change. */
@@ -2275,7 +2200,7 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte
};
int ret, ret2;
- ret = parse_events__scanner(str, /*input=*/ NULL, &parse_state);
+ ret = parse_events__scanner(str, &parse_state);
if (!ret && list_empty(&parse_state.list)) {
WARN_ONCE(true, "WARNING: event parser found nothing\n");
@@ -2327,6 +2252,8 @@ int parse_event(struct evlist *evlist, const char *str)
parse_events_error__init(&err);
ret = parse_events(evlist, str, &err);
+ if (ret && verbose > 0)
+ parse_events_error__print(&err, str);
parse_events_error__exit(&err);
return ret;
}
@@ -2823,7 +2750,7 @@ void parse_events_terms__delete(struct parse_events_terms *terms)
free(terms);
}
-int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb)
+static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb)
{
struct parse_events_term *term;
bool first = true;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 62dc7202e3ba..3577ab213730 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,7 +9,6 @@
#include <stdbool.h>
#include <linux/types.h>
#include <linux/perf_event.h>
-#include <stdio.h>
#include <string.h>
#include <sys/types.h>
@@ -60,6 +59,7 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_CONFIG1,
PARSE_EVENTS__TERM_TYPE_CONFIG2,
PARSE_EVENTS__TERM_TYPE_CONFIG3,
+ PARSE_EVENTS__TERM_TYPE_CONFIG4,
PARSE_EVENTS__TERM_TYPE_NAME,
PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
@@ -80,10 +80,11 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
PARSE_EVENTS__TERM_TYPE_METRIC_ID,
PARSE_EVENTS__TERM_TYPE_RAW,
- PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
- PARSE_EVENTS__TERM_TYPE_HARDWARE,
PARSE_EVENTS__TERM_TYPE_CPU,
-#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1)
+ PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV,
+ PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG,
+ PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG,
+#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG + 1)
};
struct parse_events_term {
@@ -131,12 +132,6 @@ struct parse_events_term {
* value is assumed to be 1. An event name also has no value.
*/
bool no_value;
- /**
- * @alternate_hw_config: config is the event name but num is an
- * alternate PERF_TYPE_HARDWARE config value which is often nice for the
- * sake of quick matching.
- */
- bool alternate_hw_config;
};
struct parse_events_error {
@@ -198,8 +193,7 @@ void parse_events_term__delete(struct parse_events_term *term);
void parse_events_terms__delete(struct parse_events_terms *terms);
void parse_events_terms__init(struct parse_events_terms *terms);
void parse_events_terms__exit(struct parse_events_terms *terms);
-int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input);
-int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb);
+int parse_events_terms(struct parse_events_terms *terms, const char *str);
struct parse_events_modifier {
u8 precise; /* Number of repeated 'p' for precision. */
@@ -216,6 +210,7 @@ struct parse_events_modifier {
bool guest : 1; /* 'G' */
bool host : 1; /* 'H' */
bool retire_lat : 1; /* 'R' */
+ bool dont_regroup : 1; /* 'X' */
};
int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc,
@@ -233,9 +228,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
u32 type, u64 config,
const struct parse_events_terms *head_config,
bool wildcard);
-int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
- struct parse_events_state *parse_state,
- struct parse_events_terms *parsed_terms);
int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config);
int parse_events_add_breakpoint(struct parse_events_state *parse_state,
struct list_head *list,
@@ -247,7 +239,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
struct perf_pmu *pmu);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name, u64 hw_config,
+ const char *event_name,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc);
@@ -263,7 +255,6 @@ struct event_symbol {
const char *symbol;
const char *alias;
};
-extern const struct event_symbol event_symbols_hw[];
char *parse_events_formats_error_string(char *additional_terms);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 2034590eb789..251ce4321878 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -5,16 +5,14 @@
%option stack
%option bison-locations
%option yylineno
-%option reject
+%option noyywrap
%{
#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
#include "parse-events.h"
#include "parse-events-bison.h"
-#include "evsel.h"
char *parse_events_get_text(yyscan_t yyscanner);
YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -75,11 +73,6 @@ static int quoted_str(yyscan_t scanner, int token)
return token;
}
-static int lc_str(yyscan_t scanner, const struct parse_events_state *state)
-{
- return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME);
-}
-
/*
* This function is called when the parser gets two kind of input:
*
@@ -117,14 +110,6 @@ do { \
yyless(0); \
} while (0)
-static int sym(yyscan_t scanner, int config)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
-
- yylval->num = config;
- return PE_VALUE_SYM_HW;
-}
-
static int term(yyscan_t scanner, enum parse_events__term_type type)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -133,16 +118,6 @@ static int term(yyscan_t scanner, enum parse_events__term_type type)
return PE_TERM;
}
-static int hw_term(yyscan_t scanner, int config)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- yylval->hardware_term.str = strdup(text);
- yylval->hardware_term.num = PERF_TYPE_HARDWARE + config;
- return PE_TERM_HW;
-}
-
static void modifiers_error(struct parse_events_state *parse_state, yyscan_t scanner,
int pos, char mod_char, const char *mod_name)
{
@@ -206,6 +181,7 @@ static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner)
CASE('e', exclusive);
CASE('b', bpf);
CASE('R', retire_lat);
+ CASE('X', dont_regroup);
default:
return PE_ERROR;
}
@@ -222,10 +198,6 @@ do { \
yycolumn += yyleng; \
} while (0);
-#define USER_REJECT \
- yycolumn -= yyleng; \
- REJECT
-
%}
%x mem
@@ -251,13 +223,11 @@ term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]*
quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\']
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/*
- * If you add a modifier you need to update check_modifier().
+ * If you add a modifier you need to update modifiers().
* Also, the letters in modifier_event must not be in modifier_bp.
*/
-modifier_event [ukhpPGHSDIWebR]{1,16}
+modifier_event [ukhpPGHSDIWebRX]{1,17}
modifier_bp [rwx]{1,3}
-lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
-lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
digit [0-9]
non_digit [^0-9]
@@ -317,6 +287,7 @@ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
config3 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG3); }
+config4 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG4); }
name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
@@ -336,23 +307,13 @@ aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); }
-cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
+ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); }
+legacy-hardware-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG); }
+legacy-cache-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG); }
r{num_raw_hex} { return str(yyscanner, PE_RAW); }
r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
-{lc_type} { return lc_str(yyscanner, _parse_state); }
-{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
-{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
{num_dec} { return value(_parse_state, yyscanner, 10); }
{num_hex} { return value(_parse_state, yyscanner, 16); }
{term_name} { return str(yyscanner, PE_NAME); }
@@ -391,20 +352,6 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
<<EOF>> { BEGIN(INITIAL); }
}
-cpu-cycles|cycles { return sym(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions { return sym(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references { return sym(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses { return sym(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches { return sym(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses { return sym(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles { return sym(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles { return sym(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
-
-{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); }
-{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
-{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
mem: { BEGIN(mem); return PE_PREFIX_MEM; }
r{num_raw_hex} { return str(yyscanner, PE_RAW); }
{num_dec} { return value(_parse_state, yyscanner, 10); }
@@ -423,8 +370,3 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); }
. { }
%%
-
-int parse_events_wrap(void *scanner __maybe_unused)
-{
- return 1;
-}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index a2361c0040d7..c194de5ec1ec 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -55,22 +55,18 @@ static void free_list_evsel(struct list_head* list_evsel)
%}
%token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM_HW PE_TERM
+%token PE_VALUE PE_TERM
%token PE_EVENT_NAME
%token PE_RAW PE_NAME
%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
-%token PE_LEGACY_CACHE
%token PE_PREFIX_MEM
%token PE_ERROR
%token PE_DRV_CFG_TERM
-%token PE_TERM_HW
%type <num> PE_VALUE
-%type <num> PE_VALUE_SYM_HW
%type <mod> PE_MODIFIER_EVENT
%type <term_type> PE_TERM
%type <str> PE_RAW
%type <str> PE_NAME
-%type <str> PE_LEGACY_CACHE
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
%type <str> PE_DRV_CFG_TERM
@@ -83,8 +79,6 @@ static void free_list_evsel(struct list_head* list_evsel)
%type <list_terms> opt_pmu_config
%destructor { parse_events_terms__delete ($$); } <list_terms>
%type <list_evsel> event_pmu
-%type <list_evsel> event_legacy_symbol
-%type <list_evsel> event_legacy_cache
%type <list_evsel> event_legacy_mem
%type <list_evsel> event_legacy_tracepoint
%type <list_evsel> event_legacy_numeric
@@ -100,8 +94,6 @@ static void free_list_evsel(struct list_head* list_evsel)
%destructor { free_list_evsel ($$); } <list_evsel>
%type <tracepoint_name> tracepoint_name
%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
-%type <hardware_term> PE_TERM_HW
-%destructor { free ($$.str); } <hardware_term>
%union
{
@@ -116,10 +108,6 @@ static void free_list_evsel(struct list_head* list_evsel)
char *sys;
char *event;
} tracepoint_name;
- struct hardware_term {
- char *str;
- u64 num;
- } hardware_term;
}
%%
@@ -262,8 +250,6 @@ PE_EVENT_NAME event_def
event_def
event_def: event_pmu |
- event_legacy_symbol |
- event_legacy_cache sep_dc |
event_legacy_mem sep_dc |
event_legacy_tracepoint sep_dc |
event_legacy_numeric sep_dc |
@@ -288,7 +274,7 @@ PE_NAME sep_dc
struct list_head *list;
int err;
- err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1);
+ err = parse_events_multi_pmu_add(_parse_state, $1, /*const_parsed_terms*/NULL, &list, &@1);
if (err < 0) {
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
@@ -304,66 +290,6 @@ PE_NAME sep_dc
$$ = list;
}
-event_legacy_symbol:
-PE_VALUE_SYM_HW '/' event_config '/'
-{
- struct list_head *list;
- int err;
-
- list = alloc_list();
- if (!list)
- YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list,
- PERF_TYPE_HARDWARE, $1,
- $3,
- /*wildcard=*/true);
- parse_events_terms__delete($3);
- if (err) {
- free_list_evsel(list);
- PE_ABORT(err);
- }
- $$ = list;
-}
-|
-PE_VALUE_SYM_HW sep_slash_slash_dc
-{
- struct list_head *list;
- int err;
-
- list = alloc_list();
- if (!list)
- YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list,
- PERF_TYPE_HARDWARE, $1,
- /*head_config=*/NULL,
- /*wildcard=*/true);
- if (err)
- PE_ABORT(err);
- $$ = list;
-}
-
-event_legacy_cache:
-PE_LEGACY_CACHE opt_event_config
-{
- struct parse_events_state *parse_state = _parse_state;
- struct list_head *list;
- int err;
-
- list = alloc_list();
- if (!list)
- YYNOMEM;
-
- err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
-
- parse_events_terms__delete($2);
- free($1);
- if (err) {
- free_list_evsel(list);
- PE_ABORT(err);
- }
- $$ = list;
-}
-
event_legacy_mem:
PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
{
@@ -582,12 +508,7 @@ event_term
$$ = head;
}
-name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE
-|
-PE_TERM_HW
-{
- $$ = $1.str;
-}
+name_or_raw: PE_RAW | PE_NAME
event_term:
PE_RAW
@@ -629,19 +550,6 @@ name_or_raw '=' PE_VALUE
$$ = term;
}
|
-PE_LEGACY_CACHE
-{
- struct parse_events_term *term;
- int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
- $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL);
-
- if (err) {
- free($1);
- PE_ABORT(err);
- }
- $$ = term;
-}
-|
PE_NAME
{
struct parse_events_term *term;
@@ -655,20 +563,6 @@ PE_NAME
$$ = term;
}
|
-PE_TERM_HW
-{
- struct parse_events_term *term;
- int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
- $1.str, $1.num & 255, /*novalue=*/false,
- &@1, /*loc_val=*/NULL);
-
- if (err) {
- free($1.str);
- PE_ABORT(err);
- }
- $$ = term;
-}
-|
PE_TERM '=' name_or_raw
{
struct parse_events_term *term;
@@ -737,8 +631,6 @@ PE_DRV_CFG_TERM
sep_dc: ':' |
-sep_slash_slash_dc: '/' '/' | ':' |
-
%%
void parse_events_error(YYLTYPE *loc, void *_parse_state,
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 1de3b69cdf4a..6ecf38314f01 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -59,10 +59,10 @@ out_delete:
static bool perf_probe_api(setup_probe_fn_t fn)
{
- const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+ struct perf_pmu *pmu;
struct perf_cpu_map *cpus;
struct perf_cpu cpu;
- int ret, i = 0;
+ int ret = 0;
cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
@@ -70,12 +70,23 @@ static bool perf_probe_api(setup_probe_fn_t fn)
cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
- do {
- ret = perf_do_probe_api(fn, cpu, try[i++]);
- if (!ret)
- return true;
- } while (ret == -EAGAIN && try[i]);
-
+ ret = perf_do_probe_api(fn, cpu, "software/cpu-clock/u");
+ if (!ret)
+ return true;
+
+ pmu = perf_pmus__scan_core(/*pmu=*/NULL);
+ if (pmu) {
+ const char *try[] = {"cycles", "instructions", NULL};
+ char buf[256];
+ int i = 0;
+
+ while (ret == -EAGAIN && try[i]) {
+ snprintf(buf, sizeof(buf), "%s/%s/u", pmu->name, try[i++]);
+ ret = perf_do_probe_api(fn, cpu, buf);
+ if (!ret)
+ return true;
+ }
+ }
return false;
}
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 66b666d9ce64..741c3d657a8b 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(inherit_thread, p_unsigned);
PRINT_ATTRf(remove_on_exec, p_unsigned);
PRINT_ATTRf(sigtrap, p_unsigned);
+ PRINT_ATTRf(defer_callchain, p_unsigned);
+ PRINT_ATTRf(defer_output, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index e5b3a2a5ddef..d9043f4afbe7 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -15,6 +15,7 @@
#include "util/strbuf.h"
#include "util/thread_map.h"
+#include <errno.h>
#include <string.h>
#include <linux/kernel.h>
#include <perfmon/pfmlib_perf_event.h>
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 5a291f1380ed..956ea273c2c7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -67,8 +67,13 @@ struct perf_pmu_alias {
* json events.
*/
char *topic;
- /** @terms: Owned list of the original parsed parameters. */
- struct parse_events_terms terms;
+ /** @terms: Owned copy of the event terms. */
+ char *terms;
+ /**
+ * @legacy_terms: If the event aliases a legacy event, holds a copy
+ * ofthe legacy event string.
+ */
+ char *legacy_terms;
/**
* @pmu_name: The name copied from the json struct pmu_event. This can
* differ from the PMU name as it won't have suffixes.
@@ -101,6 +106,12 @@ struct perf_pmu_alias {
* default.
*/
bool deprecated;
+ /**
+ * @legacy_deprecated_checked: Legacy events may not be supported by the
+ * PMU need to be checked. If they aren't supported they are marked
+ * deprecated.
+ */
+ bool legacy_deprecated_checked;
/** @from_sysfs: Was the alias from sysfs or a json event? */
bool from_sysfs;
/** @info_loaded: Have the scale, unit and other values been read from disk? */
@@ -429,7 +440,8 @@ static void perf_pmu_free_alias(struct perf_pmu_alias *alias)
zfree(&alias->long_desc);
zfree(&alias->topic);
zfree(&alias->pmu_name);
- parse_events_terms__exit(&alias->terms);
+ zfree(&alias->terms);
+ zfree(&alias->legacy_terms);
free(alias);
}
@@ -522,6 +534,7 @@ static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
struct update_alias_data {
struct perf_pmu *pmu;
struct perf_pmu_alias *alias;
+ bool legacy;
};
static int update_alias(const struct pmu_event *pe,
@@ -537,8 +550,13 @@ static int update_alias(const struct pmu_event *pe,
assign_str(pe->name, "topic", &data->alias->topic, pe->topic);
data->alias->per_pkg = pe->perpkg;
if (pe->event) {
- parse_events_terms__exit(&data->alias->terms);
- ret = parse_events_terms(&data->alias->terms, pe->event, /*input=*/NULL);
+ if (data->legacy) {
+ zfree(&data->alias->legacy_terms);
+ data->alias->legacy_terms = strdup(pe->event);
+ } else {
+ zfree(&data->alias->terms);
+ data->alias->terms = strdup(pe->event);
+ }
}
if (!ret && pe->unit) {
char *unit;
@@ -563,7 +581,7 @@ static int update_alias(const struct pmu_event *pe,
}
static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
- const char *desc, const char *val, FILE *val_fd,
+ const char *desc, const char *val, int val_fd,
const struct pmu_event *pe, enum event_source src)
{
struct perf_pmu_alias *alias, *old_alias;
@@ -590,7 +608,6 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
if (!alias)
return -ENOMEM;
- parse_events_terms__init(&alias->terms);
alias->scale = 1.0;
alias->unit[0] = '\0';
alias->per_pkg = perpkg;
@@ -615,13 +632,22 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
if (ret)
return ret;
- ret = parse_events_terms(&alias->terms, val, val_fd);
- if (ret) {
- pr_err("Cannot parse alias %s: %d\n", val, ret);
- free(alias);
- return ret;
- }
+ if (val_fd < 0) {
+ alias->terms = strdup(val);
+ } else {
+ char buf[256];
+ struct io io;
+ size_t line_len;
+ io__init(&io, val_fd, buf, sizeof(buf));
+ ret = io__getline(&io, &alias->terms, &line_len) < 0 ? -errno : 0;
+ if (ret) {
+ pr_err("Failed to read alias %s\n", name);
+ return ret;
+ }
+ if (line_len >= 1 && alias->terms[line_len - 1] == '\n')
+ alias->terms[line_len - 1] = '\0';
+ }
alias->name = strdup(name);
alias->desc = desc ? strdup(desc) : NULL;
alias->long_desc = long_desc ? strdup(long_desc) : NULL;
@@ -638,15 +664,29 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
default:
case EVENT_SRC_SYSFS:
alias->from_sysfs = true;
- if (pmu->events_table) {
+ if (pmu->events_table || pmu->is_core) {
/* Update an event from sysfs with json data. */
struct update_alias_data data = {
.pmu = pmu,
.alias = alias,
+ .legacy = false,
};
- if (pmu_events_table__find_event(pmu->events_table, pmu, name,
- update_alias, &data) == 0)
+ if ((pmu_events_table__find_event(pmu->events_table, pmu, name,
+ update_alias, &data) == 0)) {
+ /*
+ * Override sysfs encodings with json encodings
+ * specific to the cpuid.
+ */
pmu->cpu_common_json_aliases++;
+ }
+ if (pmu->is_core) {
+ /* Add in legacy encodings. */
+ data.legacy = true;
+ if (pmu_events_table__find_event(
+ perf_pmu__default_core_events_table(),
+ pmu, name, update_alias, &data) == 0)
+ pmu->cpu_common_json_aliases++;
+ }
}
pmu->sysfs_aliases++;
break;
@@ -694,7 +734,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
while ((evt_ent = io_dir__readdir(&event_dir))) {
char *name = evt_ent->d_name;
int fd;
- FILE *file;
if (!strcmp(name, ".") || !strcmp(name, ".."))
continue;
@@ -710,17 +749,12 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
pr_debug("Cannot open %s\n", name);
continue;
}
- file = fdopen(fd, "r");
- if (!file) {
- close(fd);
- continue;
- }
if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL,
- /*val=*/ NULL, file, /*pe=*/ NULL,
+ /*val=*/ NULL, fd, /*pe=*/ NULL,
EVENT_SRC_SYSFS) < 0)
pr_debug("Cannot set up %s\n", name);
- fclose(file);
+ close(fd);
}
pmu->sysfs_aliases_loaded = true;
@@ -767,29 +801,29 @@ static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd)
return ret;
}
-static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct list_head *terms)
+static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms)
{
- struct parse_events_term *term, *cloned;
- struct parse_events_terms clone_terms;
-
- parse_events_terms__init(&clone_terms);
- list_for_each_entry(term, &alias->terms.terms, list) {
- int ret = parse_events_term__clone(&cloned, term);
+ struct parse_events_terms alias_terms;
+ struct parse_events_term *term;
+ int ret;
- if (ret) {
- parse_events_terms__exit(&clone_terms);
- return ret;
- }
+ parse_events_terms__init(&alias_terms);
+ ret = parse_events_terms(&alias_terms, alias->terms);
+ if (ret) {
+ pr_err("Cannot parse '%s' terms '%s': %d\n",
+ alias->name, alias->terms, ret);
+ parse_events_terms__exit(&alias_terms);
+ return ret;
+ }
+ list_for_each_entry(term, &alias_terms.terms, list) {
/*
* Weak terms don't override command line options,
* which we don't want for implicit terms in aliases.
*/
- cloned->weak = true;
- cloned->err_term = cloned->err_val = err_loc;
- list_add_tail(&cloned->list, &clone_terms.terms);
+ term->weak = true;
}
- list_splice_init(&clone_terms.terms, terms);
- parse_events_terms__exit(&clone_terms);
+ list_splice_init(&alias_terms.terms, terms);
+ parse_events_terms__exit(&alias_terms);
return 0;
}
@@ -1045,7 +1079,7 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
{
struct perf_pmu *pmu = vdata;
- perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL,
+ perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ -1,
pe, EVENT_SRC_CPU_JSON);
return 0;
}
@@ -1061,13 +1095,16 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_tab
static void pmu_add_cpu_aliases(struct perf_pmu *pmu)
{
- if (!pmu->events_table)
+ if (!pmu->events_table && !pmu->is_core)
return;
if (pmu->cpu_aliases_added)
return;
pmu_add_cpu_aliases_table(pmu, pmu->events_table);
+ if (pmu->is_core)
+ pmu_add_cpu_aliases_table(pmu, perf_pmu__default_core_events_table());
+
pmu->cpu_aliases_added = true;
}
@@ -1094,7 +1131,7 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
pe->name,
pe->desc,
pe->event,
- /*val_fd=*/ NULL,
+ /*val_fd=*/ -1,
pe,
EVENT_SRC_SYS_JSON);
}
@@ -1539,9 +1576,41 @@ static int pmu_config_term(const struct perf_pmu *pmu,
assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
pmu_format_value(bits, term->val.num, &attr->config3, zero);
break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG4:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ pmu_format_value(bits, term->val.num, &attr->config4, zero);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG:
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ assert(term->val.num < PERF_COUNT_HW_MAX);
+ assert(pmu->is_core);
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ attr->type = PERF_TYPE_HARDWARE;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: {
+#ifndef NDEBUG
+ int cache_type = term->val.num & 0xFF;
+ int cache_op = (term->val.num >> 8) & 0xFF;
+ int cache_result = (term->val.num >> 16) & 0xFF;
+
+ assert(cache_type < PERF_COUNT_HW_CACHE_MAX);
+ assert(cache_op < PERF_COUNT_HW_CACHE_OP_MAX);
+ assert(cache_result < PERF_COUNT_HW_CACHE_RESULT_MAX);
+#endif
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ assert((term->val.num & ~0xFFFFFF) == 0);
+ assert(pmu->is_core);
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ attr->type = PERF_TYPE_HW_CACHE;
+ break;
+ }
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
return -EINVAL;
- case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU:
+ case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
/* Skip non-config terms. */
break;
default:
@@ -1586,6 +1655,9 @@ static int pmu_config_term(const struct perf_pmu *pmu,
case PERF_PMU_FORMAT_VALUE_CONFIG3:
vp = &attr->config3;
break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG4:
+ vp = &attr->config4;
+ break;
default:
return -EINVAL;
}
@@ -1717,10 +1789,14 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
return alias;
/* Alias doesn't exist, try to get it from the json events. */
- if (pmu->events_table &&
- pmu_events_table__find_event(pmu->events_table, pmu, name,
- pmu_add_cpu_aliases_map_callback,
- pmu) == 0) {
+ if ((pmu_events_table__find_event(pmu->events_table, pmu, name,
+ pmu_add_cpu_aliases_map_callback,
+ pmu) == 0) ||
+ (pmu->is_core &&
+ pmu_events_table__find_event(perf_pmu__default_core_events_table(),
+ pmu, name,
+ pmu_add_cpu_aliases_map_callback,
+ pmu) == 0)) {
alias = perf_pmu__find_alias(pmu, name, /*load=*/ false);
}
return alias;
@@ -1770,6 +1846,24 @@ static int check_info_data(struct perf_pmu *pmu,
return 0;
}
+static int perf_pmu__parse_terms_to_attr(struct perf_pmu *pmu, const char *terms_str,
+ struct perf_event_attr *attr)
+{
+ struct parse_events_terms terms;
+ int ret;
+
+ parse_events_terms__init(&terms);
+ ret = parse_events_terms(&terms, terms_str);
+ if (ret) {
+ pr_debug("Failed to parse terms '%s': %d\n", terms_str, ret);
+ parse_events_terms__exit(&terms);
+ return ret;
+ }
+ ret = perf_pmu__config(pmu, attr, &terms, /*apply_hardcoded=*/true, /*err=*/NULL);
+ parse_events_terms__exit(&terms);
+ return ret;
+}
+
/*
* Find alias in the terms list and replace it with the terms
* defined for the alias
@@ -1813,10 +1907,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
alias = pmu_find_alias(pmu, term);
if (!alias)
continue;
- ret = pmu_alias_terms(alias, term->err_term, &term->list);
+ ret = pmu_alias_terms(alias, &term->list);
if (ret) {
parse_events_error__handle(err, term->err_term,
- strdup("Failure to duplicate terms"),
+ strdup("Failed to parse terms"),
NULL);
return ret;
}
@@ -1826,12 +1920,23 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
if (ret)
return ret;
+ if (alias->legacy_terms) {
+ struct perf_event_attr attr = {.config = 0,};
+
+ ret = perf_pmu__parse_terms_to_attr(pmu, alias->legacy_terms, &attr);
+ if (ret) {
+ parse_events_error__handle(err, term->err_term,
+ strdup("Error evaluating legacy terms"),
+ NULL);
+ return ret;
+ }
+ if (attr.type == PERF_TYPE_HARDWARE)
+ *alternate_hw_config = attr.config & PERF_HW_EVENT_MASK;
+ }
+
if (alias->per_pkg)
info->per_pkg = true;
- if (term->alternate_hw_config)
- *alternate_hw_config = term->val.num;
-
info->retirement_latency_mean = alias->retirement_latency_mean;
info->retirement_latency_min = alias->retirement_latency_min;
info->retirement_latency_max = alias->retirement_latency_max;
@@ -1912,6 +2017,9 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"config1=0..0xffffffffffffffff",
"config2=0..0xffffffffffffffff",
"config3=0..0xffffffffffffffff",
+ "config4=0..0xffffffffffffffff",
+ "legacy-hardware-config=0..9,",
+ "legacy-cache-config=0..0xffffff,",
"name=string",
"period=number",
"freq=number",
@@ -1930,16 +2038,17 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
"cpu=number",
+ "ratio-to-prev=string",
};
struct perf_pmu_format *format;
int ret;
/*
* max-events and driver-config are missing above as are the internal
- * types user, metric-id, raw, legacy cache and hardware. Assert against
- * the enum parse_events__term_type so they are kept in sync.
+ * types user, metric-id, and raw. Assert against the enum
+ * parse_events__term_type so they are kept in sync.
*/
- _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6,
+ _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 4,
"perf_pmu__for_each_format()'s terms must be kept in sync with enum parse_events__term_type");
list_for_each_entry(format, &pmu->format, list) {
perf_pmu_format__load(pmu, format);
@@ -1992,9 +2101,13 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
return drm_pmu__have_event(pmu, name);
if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
return true;
- if (pmu->cpu_aliases_added || !pmu->events_table)
+ if (pmu->cpu_aliases_added || (!pmu->events_table && !pmu->is_core))
return false;
- return pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0;
+ if (pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0)
+ return true;
+ return pmu->is_core &&
+ pmu_events_table__find_event(perf_pmu__default_core_events_table(),
+ pmu, name, NULL, NULL) == 0;
}
size_t perf_pmu__num_events(struct perf_pmu *pmu)
@@ -2011,13 +2124,18 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
pmu_aliases_parse(pmu);
nr = pmu->sysfs_aliases + pmu->sys_json_aliases;
- if (pmu->cpu_aliases_added)
- nr += pmu->cpu_json_aliases;
- else if (pmu->events_table)
- nr += pmu_events_table__num_events(pmu->events_table, pmu) -
- pmu->cpu_common_json_aliases;
- else
+ if (pmu->cpu_aliases_added) {
+ nr += pmu->cpu_json_aliases;
+ } else if (pmu->events_table || pmu->is_core) {
+ nr += pmu_events_table__num_events(pmu->events_table, pmu);
+ if (pmu->is_core) {
+ nr += pmu_events_table__num_events(
+ perf_pmu__default_core_events_table(), pmu);
+ }
+ nr -= pmu->cpu_common_json_aliases;
+ } else {
assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0);
+ }
if (perf_pmu__is_tool(pmu))
nr -= tool_pmu__num_skip_events();
@@ -2035,18 +2153,37 @@ static int sub_non_neg(int a, int b)
static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
const struct perf_pmu_alias *alias, bool skip_duplicate_pmus)
{
+ struct parse_events_terms terms;
struct parse_events_term *term;
+ int ret, used;
size_t pmu_name_len = pmu_deduped_name_len(pmu, pmu->name,
skip_duplicate_pmus);
- int used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name);
- list_for_each_entry(term, &alias->terms.terms, list) {
+ /* Paramemterized events have the parameters shown. */
+ if (strstr(alias->terms, "=?")) {
+ /* No parameters. */
+ snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name);
+ return buf;
+ }
+
+ parse_events_terms__init(&terms);
+ ret = parse_events_terms(&terms, alias->terms);
+ if (ret) {
+ pr_err("Failure to parse '%s' terms '%s': %d\n",
+ alias->name, alias->terms, ret);
+ parse_events_terms__exit(&terms);
+ snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name);
+ return buf;
+ }
+ used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name);
+
+ list_for_each_entry(term, &terms.terms, list) {
if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
used += snprintf(buf + used, sub_non_neg(len, used),
",%s=%s", term->config,
term->val.str);
}
-
+ parse_events_terms__exit(&terms);
if (sub_non_neg(len, used) > 0) {
buf[used] = '/';
used++;
@@ -2060,6 +2197,42 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
return buf;
}
+static bool perf_pmu_alias__check_deprecated(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+ struct perf_event_attr attr = {.config = 0,};
+ const char *check_terms;
+ bool has_legacy_config;
+
+ if (alias->legacy_deprecated_checked)
+ return alias->deprecated;
+
+ alias->legacy_deprecated_checked = true;
+ if (alias->deprecated)
+ return true;
+
+ check_terms = alias->terms;
+ has_legacy_config =
+ strstr(check_terms, "legacy-hardware-config=") != NULL ||
+ strstr(check_terms, "legacy-cache-config=") != NULL;
+ if (!has_legacy_config && alias->legacy_terms) {
+ check_terms = alias->legacy_terms;
+ has_legacy_config =
+ strstr(check_terms, "legacy-hardware-config=") != NULL ||
+ strstr(check_terms, "legacy-cache-config=") != NULL;
+ }
+ if (!has_legacy_config)
+ return false;
+
+ if (perf_pmu__parse_terms_to_attr(pmu, check_terms, &attr) != 0) {
+ /* Parsing failed, set as deprecated. */
+ alias->deprecated = true;
+ } else if (attr.type < PERF_TYPE_MAX) {
+ /* Flag unsupported legacy events as deprecated. */
+ alias->deprecated = !is_event_supported(attr.type, attr.config);
+ }
+ return alias->deprecated;
+}
+
int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
void *state, pmu_event_callback cb)
{
@@ -2069,7 +2242,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
.event_type_desc = "Kernel PMU event",
};
int ret = 0;
- struct strbuf sb;
struct hashmap_entry *entry;
size_t bkt;
@@ -2080,7 +2252,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
if (perf_pmu__is_drm(pmu))
return drm_pmu__for_each_event(pmu, state, cb);
- strbuf_init(&sb, /*hint=*/ 0);
pmu_aliases_parse(pmu);
pmu_add_cpu_aliases(pmu);
hashmap__for_each_entry(pmu->aliases, entry, bkt) {
@@ -2115,16 +2286,14 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
info.desc = event->desc;
info.long_desc = event->long_desc;
info.encoding_desc = buf + buf_used;
- parse_events_terms__to_strbuf(&event->terms, &sb);
buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
- "%.*s/%s/", (int)pmu_name_len, info.pmu_name, sb.buf) + 1;
+ "%.*s/%s/", (int)pmu_name_len, info.pmu_name, event->terms) + 1;
+ info.str = event->terms;
info.topic = event->topic;
- info.str = sb.buf;
- info.deprecated = event->deprecated;
+ info.deprecated = perf_pmu_alias__check_deprecated(pmu, event);
ret = cb(state, &info);
if (ret)
goto out;
- strbuf_setlen(&sb, /*len=*/ 0);
}
if (pmu->selectable) {
info.name = buf;
@@ -2140,7 +2309,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
ret = cb(state, &info);
}
out:
- strbuf_release(&sb);
return ret;
}
@@ -2588,9 +2756,7 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
hashmap__for_each_entry(pmu->aliases, entry, bkt) {
struct perf_pmu_alias *event = entry->pvalue;
struct perf_event_attr attr = {.config = 0,};
-
- int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true,
- /*err=*/NULL);
+ int ret = perf_pmu__parse_terms_to_attr(pmu, event->terms, &attr);
if (ret == 0 && config == attr.config)
return event->name;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 1ebcf0242af8..8f11bfe8ed6d 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -23,6 +23,7 @@ enum {
PERF_PMU_FORMAT_VALUE_CONFIG1,
PERF_PMU_FORMAT_VALUE_CONFIG2,
PERF_PMU_FORMAT_VALUE_CONFIG3,
+ PERF_PMU_FORMAT_VALUE_CONFIG4,
PERF_PMU_FORMAT_VALUE_CONFIG_END,
};
@@ -37,6 +38,19 @@ struct perf_pmu_caps {
struct list_head list;
};
+enum pmu_kind {
+ /* A perf event syscall PMU. */
+ PERF_PMU_KIND_PE,
+ /* A perf tool provided DRM PMU. */
+ PERF_PMU_KIND_DRM,
+ /* A perf tool provided HWMON PMU. */
+ PERF_PMU_KIND_HWMON,
+ /* Perf tool provided PMU for tool events like time. */
+ PERF_PMU_KIND_TOOL,
+ /* A testing PMU kind. */
+ PERF_PMU_KIND_FAKE
+};
+
enum {
PERF_PMU_TYPE_PE_START = 0,
PERF_PMU_TYPE_PE_END = 0xFFFDFFFF,
@@ -306,4 +320,23 @@ void perf_pmu__delete(struct perf_pmu *pmu);
const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
bool perf_pmu__is_fake(const struct perf_pmu *pmu);
+static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu)
+{
+ __u32 type;
+
+ if (!pmu)
+ return PERF_PMU_KIND_PE;
+
+ type = pmu->type;
+ if (type <= PERF_PMU_TYPE_PE_END)
+ return PERF_PMU_KIND_PE;
+ if (type <= PERF_PMU_TYPE_DRM_END)
+ return PERF_PMU_KIND_DRM;
+ if (type <= PERF_PMU_TYPE_HWMON_END)
+ return PERF_PMU_KIND_HWMON;
+ if (type == PERF_PMU_TYPE_TOOL)
+ return PERF_PMU_KIND_TOOL;
+ return PERF_PMU_KIND_FAKE;
+}
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/powerpc-vpadtl.c b/tools/perf/util/powerpc-vpadtl.c
new file mode 100644
index 000000000000..d1c3396f182f
--- /dev/null
+++ b/tools/perf/util/powerpc-vpadtl.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VPA DTL PMU support
+ */
+
+#include <linux/string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include "color.h"
+#include "evlist.h"
+#include "session.h"
+#include "auxtrace.h"
+#include "data.h"
+#include "machine.h"
+#include "debug.h"
+#include "powerpc-vpadtl.h"
+#include "sample.h"
+#include "tool.h"
+
+/*
+ * Structure to save the auxtrace queue
+ */
+struct powerpc_vpadtl {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 pmu_type;
+ u64 sample_id;
+};
+
+struct boottb_freq {
+ u64 boot_tb;
+ u64 tb_freq;
+ u64 timebase;
+ u64 padded[3];
+};
+
+struct powerpc_vpadtl_queue {
+ struct powerpc_vpadtl *vpa;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct thread *thread;
+ bool on_heap;
+ struct powerpc_vpadtl_entry *dtl;
+ u64 timestamp;
+ unsigned long pkt_len;
+ unsigned long buf_len;
+ u64 boot_tb;
+ u64 tb_freq;
+ unsigned int tb_buffer;
+ unsigned int size;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+};
+
+const char *dispatch_reasons[11] = {
+ "external_interrupt",
+ "firmware_internal_event",
+ "H_PROD",
+ "decrementer_interrupt",
+ "system_reset",
+ "firmware_internal_event",
+ "conferred_cycles",
+ "time_slice",
+ "virtual_memory_page_fault",
+ "expropriated_adjunct",
+ "priv_doorbell"};
+
+const char *preempt_reasons[10] = {
+ "unused",
+ "firmware_internal_event",
+ "H_CEDE",
+ "H_CONFER",
+ "time_slice",
+ "migration_hibernation_page_fault",
+ "virtual_memory_page_fault",
+ "H_CONFER_ADJUNCT",
+ "hcall_adjunct",
+ "HDEC_adjunct"};
+
+#define dtl_entry_size sizeof(struct powerpc_vpadtl_entry)
+
+/*
+ * Function to dump the dispatch trace data when perf report
+ * is invoked with -D
+ */
+static void powerpc_vpadtl_dump(struct powerpc_vpadtl *vpa __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct powerpc_vpadtl_entry *dtl;
+ int pkt_len, pos = 0;
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... VPA DTL PMU data: size %zu bytes, entries is %zu\n",
+ len, len/dtl_entry_size);
+
+ if (len % dtl_entry_size)
+ len = len - (len % dtl_entry_size);
+
+ while (len) {
+ pkt_len = dtl_entry_size;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ dtl = (struct powerpc_vpadtl_entry *)buf;
+ if (dtl->timebase != 0) {
+ printf("dispatch_reason:%s, preempt_reason:%s, "
+ "enqueue_to_dispatch_time:%d, ready_to_enqueue_time:%d, "
+ "waiting_to_ready_time:%d\n",
+ dispatch_reasons[dtl->dispatch_reason],
+ preempt_reasons[dtl->preempt_reason],
+ be32_to_cpu(dtl->enqueue_to_dispatch_time),
+ be32_to_cpu(dtl->ready_to_enqueue_time),
+ be32_to_cpu(dtl->waiting_to_ready_time));
+ } else {
+ struct boottb_freq *boot_tb = (struct boottb_freq *)buf;
+
+ printf("boot_tb: %" PRIu64 ", tb_freq: %" PRIu64 "\n",
+ boot_tb->boot_tb, boot_tb->tb_freq);
+ }
+
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static unsigned long long powerpc_vpadtl_timestamp(struct powerpc_vpadtl_queue *vpaq)
+{
+ struct powerpc_vpadtl_entry *record = vpaq->dtl;
+ unsigned long long timestamp = 0;
+ unsigned long long boot_tb;
+ unsigned long long diff;
+ double result, div;
+ double boot_freq;
+ /*
+ * Formula used to get timestamp that can be co-related with
+ * other perf events:
+ * ((timbase from DTL entry - boot time) / frequency) * 1000000000
+ */
+ if (record->timebase) {
+ boot_tb = vpaq->boot_tb;
+ boot_freq = vpaq->tb_freq;
+ diff = be64_to_cpu(record->timebase) - boot_tb;
+ div = diff / boot_freq;
+ result = div;
+ result = result * 1000000000;
+ timestamp = result;
+ }
+
+ return timestamp;
+}
+
+static struct powerpc_vpadtl *session_to_vpa(struct perf_session *session)
+{
+ return container_of(session->auxtrace, struct powerpc_vpadtl, auxtrace);
+}
+
+static void powerpc_vpadtl_dump_event(struct powerpc_vpadtl *vpa, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ powerpc_vpadtl_dump(vpa, buf, len);
+}
+
+/*
+ * Generate perf sample for each entry in the dispatch trace log.
+ * - sample ip is picked from srr0 field of powerpc_vpadtl_entry
+ * - sample cpu is logical cpu.
+ * - cpumode is set to PERF_RECORD_MISC_KERNEL
+ * - Additionally save the details in raw_data of sample. This
+ * is to print the relevant fields in perf_sample__fprintf_synth()
+ * when called from builtin-script
+ */
+static int powerpc_vpadtl_sample(struct powerpc_vpadtl_entry *record,
+ struct powerpc_vpadtl *vpa, u64 save, int cpu)
+{
+ struct perf_sample sample;
+ union perf_event event;
+
+ sample.ip = be64_to_cpu(record->srr0);
+ sample.period = 1;
+ sample.cpu = cpu;
+ sample.id = vpa->sample_id;
+ sample.callchain = NULL;
+ sample.branch_stack = NULL;
+ memset(&event, 0, sizeof(event));
+ sample.cpumode = PERF_RECORD_MISC_KERNEL;
+ sample.time = save;
+ sample.raw_data = record;
+ sample.raw_size = sizeof(record);
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ if (perf_session__deliver_synth_event(vpa->session, &event, &sample)) {
+ pr_debug("Failed to create sample for dtl entry\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int powerpc_vpadtl_get_buffer(struct powerpc_vpadtl_queue *vpaq)
+{
+ struct auxtrace_buffer *buffer = vpaq->buffer;
+ struct auxtrace_queues *queues = &vpaq->vpa->queues;
+ struct auxtrace_queue *queue;
+
+ queue = &queues->queue_array[vpaq->queue_nr];
+ buffer = auxtrace_buffer__next(queue, buffer);
+
+ if (!buffer)
+ return 0;
+
+ vpaq->buffer = buffer;
+ vpaq->size = buffer->size;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(vpaq->vpa->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ vpaq->buf_len = buffer->size;
+
+ if (buffer->size % dtl_entry_size)
+ vpaq->buf_len = buffer->size - (buffer->size % dtl_entry_size);
+
+ if (vpaq->tb_buffer != buffer->buffer_nr) {
+ vpaq->pkt_len = 0;
+ vpaq->tb_buffer = 0;
+ }
+
+ return 1;
+}
+
+/*
+ * The first entry in the queue for VPA DTL PMU has the boot timebase,
+ * frequency details which are needed to get timestamp which is required to
+ * correlate with other events. Save the boot_tb and tb_freq as part of
+ * powerpc_vpadtl_queue. The very next entry is the actual trace data to
+ * be returned.
+ */
+static int powerpc_vpadtl_decode(struct powerpc_vpadtl_queue *vpaq)
+{
+ int ret;
+ char *buf;
+ struct boottb_freq *boottb;
+
+ ret = powerpc_vpadtl_get_buffer(vpaq);
+ if (ret <= 0)
+ return ret;
+
+ boottb = (struct boottb_freq *)vpaq->buffer->data;
+ if (boottb->timebase == 0) {
+ vpaq->boot_tb = boottb->boot_tb;
+ vpaq->tb_freq = boottb->tb_freq;
+ vpaq->pkt_len += dtl_entry_size;
+ }
+
+ buf = vpaq->buffer->data;
+ buf += vpaq->pkt_len;
+ vpaq->dtl = (struct powerpc_vpadtl_entry *)buf;
+
+ vpaq->tb_buffer = vpaq->buffer->buffer_nr;
+ vpaq->buffer = NULL;
+ vpaq->buf_len = 0;
+
+ return 1;
+}
+
+static int powerpc_vpadtl_decode_all(struct powerpc_vpadtl_queue *vpaq)
+{
+ int ret;
+ unsigned char *buf;
+
+ if (!vpaq->buf_len || vpaq->pkt_len == vpaq->size) {
+ ret = powerpc_vpadtl_get_buffer(vpaq);
+ if (ret <= 0)
+ return ret;
+ }
+
+ if (vpaq->buffer) {
+ buf = vpaq->buffer->data;
+ buf += vpaq->pkt_len;
+ vpaq->dtl = (struct powerpc_vpadtl_entry *)buf;
+ if ((long long)be64_to_cpu(vpaq->dtl->timebase) <= 0) {
+ if (vpaq->pkt_len != dtl_entry_size && vpaq->buf_len) {
+ vpaq->pkt_len += dtl_entry_size;
+ vpaq->buf_len -= dtl_entry_size;
+ }
+ return -1;
+ }
+ vpaq->pkt_len += dtl_entry_size;
+ vpaq->buf_len -= dtl_entry_size;
+ } else {
+ return 0;
+ }
+
+ return 1;
+}
+
+static int powerpc_vpadtl_run_decoder(struct powerpc_vpadtl_queue *vpaq, u64 *timestamp)
+{
+ struct powerpc_vpadtl *vpa = vpaq->vpa;
+ struct powerpc_vpadtl_entry *record;
+ int ret;
+ unsigned long long vpaq_timestamp;
+
+ while (1) {
+ ret = powerpc_vpadtl_decode_all(vpaq);
+ if (!ret) {
+ pr_debug("All data in the queue has been processed.\n");
+ return 1;
+ }
+
+ /*
+ * Error is detected when decoding VPA PMU trace. Continue to
+ * the next trace data and find out more dtl entries.
+ */
+ if (ret < 0)
+ continue;
+
+ record = vpaq->dtl;
+
+ vpaq_timestamp = powerpc_vpadtl_timestamp(vpaq);
+
+ /* Update timestamp for the last record */
+ if (vpaq_timestamp > vpaq->timestamp)
+ vpaq->timestamp = vpaq_timestamp;
+
+ /*
+ * If the timestamp of the queue is later than timestamp of the
+ * coming perf event, bail out so can allow the perf event to
+ * be processed ahead.
+ */
+ if (vpaq->timestamp >= *timestamp) {
+ *timestamp = vpaq->timestamp;
+ vpaq->pkt_len -= dtl_entry_size;
+ vpaq->buf_len += dtl_entry_size;
+ return 0;
+ }
+
+ ret = powerpc_vpadtl_sample(record, vpa, vpaq_timestamp, vpaq->cpu);
+ if (ret)
+ continue;
+ }
+ return 0;
+}
+
+/*
+ * For each of the PERF_RECORD_XX record, compare the timestamp
+ * of perf record with timestamp of top element in the auxtrace heap.
+ * Process the auxtrace queue if the timestamp of element from heap is
+ * lower than timestamp from entry in perf record.
+ *
+ * Update the timestamp of the auxtrace heap with the timestamp
+ * of last processed entry from the auxtrace buffer.
+ */
+static int powerpc_vpadtl_process_queues(struct powerpc_vpadtl *vpa, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct powerpc_vpadtl_queue *vpaq;
+
+ if (!vpa->heap.heap_cnt)
+ return 0;
+
+ if (vpa->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = vpa->heap.heap_array[0].queue_nr;
+ queue = &vpa->queues.queue_array[queue_nr];
+ vpaq = queue->priv;
+
+ auxtrace_heap__pop(&vpa->heap);
+
+ if (vpa->heap.heap_cnt) {
+ ts = vpa->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ ret = powerpc_vpadtl_run_decoder(vpaq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&vpa->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&vpa->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ vpaq->on_heap = false;
+ }
+ }
+ return 0;
+}
+
+static struct powerpc_vpadtl_queue *powerpc_vpadtl__alloc_queue(struct powerpc_vpadtl *vpa,
+ unsigned int queue_nr)
+{
+ struct powerpc_vpadtl_queue *vpaq;
+
+ vpaq = zalloc(sizeof(*vpaq));
+ if (!vpaq)
+ return NULL;
+
+ vpaq->vpa = vpa;
+ vpaq->queue_nr = queue_nr;
+
+ return vpaq;
+}
+
+/*
+ * When the Dispatch Trace Log data is collected along with other events
+ * like sched tracepoint events, it needs to be correlated and present
+ * interleaved along with these events. Perf events can be collected
+ * parallely across the CPUs.
+ *
+ * An auxtrace_queue is created for each CPU. Data within each queue is in
+ * increasing order of timestamp. Allocate and setup auxtrace queues here.
+ * All auxtrace queues is maintained in auxtrace heap in the increasing order
+ * of timestamp. So always the lowest timestamp (entries to be processed first)
+ * is on top of the heap.
+ *
+ * To add to auxtrace heap, fetch the timestamp from first DTL entry
+ * for each of the queue.
+ */
+static int powerpc_vpadtl__setup_queue(struct powerpc_vpadtl *vpa,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct powerpc_vpadtl_queue *vpaq = queue->priv;
+
+ if (list_empty(&queue->head) || vpaq)
+ return 0;
+
+ vpaq = powerpc_vpadtl__alloc_queue(vpa, queue_nr);
+ if (!vpaq)
+ return -ENOMEM;
+
+ queue->priv = vpaq;
+
+ if (queue->cpu != -1)
+ vpaq->cpu = queue->cpu;
+
+ if (!vpaq->on_heap) {
+ int ret;
+retry:
+ ret = powerpc_vpadtl_decode(vpaq);
+ if (!ret)
+ return 0;
+
+ if (ret < 0)
+ goto retry;
+
+ vpaq->timestamp = powerpc_vpadtl_timestamp(vpaq);
+
+ ret = auxtrace_heap__add(&vpa->heap, queue_nr, vpaq->timestamp);
+ if (ret)
+ return ret;
+ vpaq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int powerpc_vpadtl__setup_queues(struct powerpc_vpadtl *vpa)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < vpa->queues.nr_queues; i++) {
+ ret = powerpc_vpadtl__setup_queue(vpa, &vpa->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int powerpc_vpadtl__update_queues(struct powerpc_vpadtl *vpa)
+{
+ if (vpa->queues.new_data) {
+ vpa->queues.new_data = false;
+ return powerpc_vpadtl__setup_queues(vpa);
+ }
+
+ return 0;
+}
+
+static int powerpc_vpadtl_process_event(struct perf_session *session,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ const struct perf_tool *tool)
+{
+ struct powerpc_vpadtl *vpa = session_to_vpa(session);
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("VPA requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time) {
+ err = powerpc_vpadtl__update_queues(vpa);
+ if (err)
+ return err;
+
+ err = powerpc_vpadtl_process_queues(vpa, sample->time);
+ }
+
+ return err;
+}
+
+/*
+ * Process PERF_RECORD_AUXTRACE records
+ */
+static int powerpc_vpadtl_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ const struct perf_tool *tool __maybe_unused)
+{
+ struct powerpc_vpadtl *vpa = session_to_vpa(session);
+ struct auxtrace_buffer *buffer;
+ int fd = perf_data__fd(session->data);
+ off_t data_offset;
+ int err;
+
+ if (!dump_trace)
+ return 0;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&vpa->queues, session, event,
+ data_offset, &buffer);
+
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ powerpc_vpadtl_dump_event(vpa, buffer->data, buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+
+ return 0;
+}
+
+static int powerpc_vpadtl_flush(struct perf_session *session __maybe_unused,
+ const struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void powerpc_vpadtl_free_events(struct perf_session *session)
+{
+ struct powerpc_vpadtl *vpa = session_to_vpa(session);
+ struct auxtrace_queues *queues = &vpa->queues;
+
+ for (unsigned int i = 0; i < queues->nr_queues; i++)
+ zfree(&queues->queue_array[i].priv);
+
+ auxtrace_queues__free(queues);
+}
+
+static void powerpc_vpadtl_free(struct perf_session *session)
+{
+ struct powerpc_vpadtl *vpa = session_to_vpa(session);
+
+ auxtrace_heap__free(&vpa->heap);
+ powerpc_vpadtl_free_events(session);
+ session->auxtrace = NULL;
+ free(vpa);
+}
+
+static const char * const powerpc_vpadtl_info_fmts[] = {
+ [POWERPC_VPADTL_TYPE] = " PMU Type %"PRId64"\n",
+};
+
+static void powerpc_vpadtl_print_info(__u64 *arr)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, powerpc_vpadtl_info_fmts[POWERPC_VPADTL_TYPE], arr[POWERPC_VPADTL_TYPE]);
+}
+
+static void set_event_name(struct evlist *evlist, u64 id,
+ const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.id && evsel->core.id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static int
+powerpc_vpadtl_synth_events(struct powerpc_vpadtl *vpa, struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (strstarts(evsel->name, "vpa_dtl")) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with VPA trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.sample_type = evsel->core.attr.sample_type;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.type = PERF_TYPE_SYNTH;
+ attr.config = PERF_SYNTH_POWERPC_VPA_DTL;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = auxtrace_synth_id_range_start(evsel);
+
+ err = perf_session__deliver_synth_attr_event(session, &attr, id);
+ if (err)
+ return err;
+
+ vpa->sample_id = id;
+ set_event_name(evlist, id, "vpa-dtl");
+
+ return 0;
+}
+
+/*
+ * Process the PERF_RECORD_AUXTRACE_INFO records and setup
+ * the infrastructure to process auxtrace events. PERF_RECORD_AUXTRACE_INFO
+ * is processed first since it is of type perf_user_event_type.
+ * Initialise the aux buffer queues using auxtrace_queues__init().
+ * auxtrace_queue is created for each CPU.
+ */
+int powerpc_vpadtl_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * POWERPC_VPADTL_TYPE;
+ struct powerpc_vpadtl *vpa;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
+ min_sz)
+ return -EINVAL;
+
+ vpa = zalloc(sizeof(struct powerpc_vpadtl));
+ if (!vpa)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&vpa->queues);
+ if (err)
+ goto err_free;
+
+ vpa->session = session;
+ vpa->machine = &session->machines.host; /* No kvm support */
+ vpa->auxtrace_type = auxtrace_info->type;
+ vpa->pmu_type = auxtrace_info->priv[POWERPC_VPADTL_TYPE];
+
+ vpa->auxtrace.process_event = powerpc_vpadtl_process_event;
+ vpa->auxtrace.process_auxtrace_event = powerpc_vpadtl_process_auxtrace_event;
+ vpa->auxtrace.flush_events = powerpc_vpadtl_flush;
+ vpa->auxtrace.free_events = powerpc_vpadtl_free_events;
+ vpa->auxtrace.free = powerpc_vpadtl_free;
+ session->auxtrace = &vpa->auxtrace;
+
+ powerpc_vpadtl_print_info(&auxtrace_info->priv[0]);
+
+ if (dump_trace)
+ return 0;
+
+ err = powerpc_vpadtl_synth_events(vpa, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&vpa->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&vpa->queues);
+ session->auxtrace = NULL;
+
+err_free:
+ free(vpa);
+ return err;
+}
diff --git a/tools/perf/util/powerpc-vpadtl.h b/tools/perf/util/powerpc-vpadtl.h
new file mode 100644
index 000000000000..ca809660b9bb
--- /dev/null
+++ b/tools/perf/util/powerpc-vpadtl.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * VPA DTL PMU Support
+ */
+
+#ifndef INCLUDE__PERF_POWERPC_VPADTL_H__
+#define INCLUDE__PERF_POWERPC_VPADTL_H__
+
+enum {
+ POWERPC_VPADTL_TYPE,
+ VPADTL_AUXTRACE_PRIV_MAX,
+};
+
+#define VPADTL_AUXTRACE_PRIV_SIZE (VPADTL_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+int powerpc_vpadtl_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 4153124a9948..8f3ed83853a9 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -186,113 +186,6 @@ bool is_event_supported(u8 type, u64 config)
return ret;
}
-int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state)
-{
- struct perf_pmu *pmu = NULL;
- const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE];
-
- /*
- * Only print core PMUs, skipping uncore for performance and
- * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst.
- */
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
- if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE)
- continue;
-
- for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
- /* skip invalid cache type */
- if (!evsel__is_cache_op_valid(type, op))
- continue;
-
- for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) {
- char name[64];
- char alias_name[128];
- __u64 config;
- int ret;
-
- __evsel__hw_cache_type_op_res_name(type, op, res,
- name, sizeof(name));
-
- ret = parse_events__decode_legacy_cache(name, pmu->type,
- &config);
- if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config))
- continue;
- snprintf(alias_name, sizeof(alias_name), "%s/%s/",
- pmu->name, name);
- print_cb->print_event(print_state,
- "cache",
- pmu->name,
- pmu->type,
- name,
- alias_name,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- event_type_descriptor,
- /*desc=*/NULL,
- /*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
- }
- }
- }
- }
- return 0;
-}
-
-void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
- unsigned int type, const struct event_symbol *syms,
- unsigned int max)
-{
- struct strlist *evt_name_list = strlist__new(NULL, NULL);
- struct str_node *nd;
-
- if (!evt_name_list) {
- pr_debug("Failed to allocate new strlist for symbol events\n");
- return;
- }
- for (unsigned int i = 0; i < max; i++) {
- /*
- * New attr.config still not supported here, the latest
- * example was PERF_COUNT_SW_CGROUP_SWITCHES
- */
- if (syms[i].symbol == NULL)
- continue;
-
- if (!is_event_supported(type, i))
- continue;
-
- if (strlen(syms[i].alias)) {
- char name[MAX_NAME_LEN];
-
- snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias);
- strlist__add(evt_name_list, name);
- } else
- strlist__add(evt_name_list, syms[i].symbol);
- }
-
- strlist__for_each_entry(nd, evt_name_list) {
- char *alias = strstr(nd->s, " OR ");
-
- if (alias) {
- *alias = '\0';
- alias += 4;
- }
- print_cb->print_event(print_state,
- /*topic=*/NULL,
- /*pmu_name=*/NULL,
- type,
- nd->s,
- alias,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- event_type_descriptors[type],
- /*desc=*/NULL,
- /*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
- }
- strlist__delete(evt_name_list);
-}
-
/** struct mep - RB-tree node for building printing information. */
struct mep {
/** nd - RB-tree element. */
@@ -431,11 +324,6 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat
*/
void print_events(const struct print_callbacks *print_cb, void *print_state)
{
- print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX);
-
- print_hwcache_events(print_cb, print_state);
-
perf_pmus__print_pmu_events(print_cb, print_state);
print_cb->print_event(print_state,
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index d6ba384f0c66..eabba5d4a1fd 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -32,11 +32,7 @@ struct print_callbacks {
/** Print all events, the default when no options are specified. */
void print_events(const struct print_callbacks *print_cb, void *print_state);
-int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state);
void print_sdt_events(const struct print_callbacks *print_cb, void *print_state);
-void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
- unsigned int type, const struct event_symbol *syms,
- unsigned int max);
void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
bool is_event_supported(u8 type, u64 config);
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index a33a7726422d..02e6fbb8ca04 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -7,6 +7,7 @@
#include <inttypes.h>
#include <string.h>
#include <stdbool.h>
+#include "capstone.h"
#include "debug.h"
#include "sample.h"
#include "symbol.h"
@@ -29,84 +30,6 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
return printed;
}
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-#include <capstone/capstone.h>
-
-int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
-
-int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style)
-{
- cs_arch arch;
- cs_mode mode;
-
- if (machine__is(machine, "x86_64") && is64) {
- arch = CS_ARCH_X86;
- mode = CS_MODE_64;
- } else if (machine__normalized_is(machine, "x86")) {
- arch = CS_ARCH_X86;
- mode = CS_MODE_32;
- } else if (machine__normalized_is(machine, "arm64")) {
- arch = CS_ARCH_ARM64;
- mode = CS_MODE_ARM;
- } else if (machine__normalized_is(machine, "arm")) {
- arch = CS_ARCH_ARM;
- mode = CS_MODE_ARM + CS_MODE_V8;
- } else if (machine__normalized_is(machine, "s390")) {
- arch = CS_ARCH_SYSZ;
- mode = CS_MODE_BIG_ENDIAN;
- } else {
- return -1;
- }
-
- if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
- pr_warning_once("cs_open failed\n");
- return -1;
- }
-
- if (machine__normalized_is(machine, "x86")) {
- /*
- * In case of using capstone_init while symbol__disassemble
- * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
- * is set via annotation args
- */
- if (disassembler_style)
- cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
- /*
- * Resolving address operands to symbols is implemented
- * on x86 by investigating instruction details.
- */
- cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
- }
-
- return 0;
-}
-
-static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn,
- int print_opts, FILE *fp)
-{
- struct addr_location al;
- size_t printed = 0;
-
- if (insn->detail && insn->detail->x86.op_count == 1) {
- cs_x86_op *op = &insn->detail->x86.operands[0];
-
- addr_location__init(&al);
- if (op->type == X86_OP_IMM &&
- thread__find_symbol(thread, cpumode, op->imm, &al)) {
- printed += fprintf(fp, "%s ", insn[0].mnemonic);
- printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
- if (print_opts & PRINT_INSN_IMM_HEX)
- printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
- addr_location__exit(&al);
- return printed;
- }
- addr_location__exit(&al);
- }
-
- printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
- return printed;
-}
-
static bool is64bitip(struct machine *machine, struct addr_location *al)
{
const struct dso *dso = al->map ? map__dso(al->map) : NULL;
@@ -123,32 +46,8 @@ ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpum
bool is64bit, const uint8_t *code, size_t code_size,
uint64_t ip, int *lenp, int print_opts, FILE *fp)
{
- size_t printed;
- cs_insn *insn;
- csh cs_handle;
- size_t count;
- int ret;
-
- /* TODO: Try to initiate capstone only once but need a proper place. */
- ret = capstone_init(machine, &cs_handle, is64bit, true);
- if (ret < 0)
- return ret;
-
- count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
- if (count > 0) {
- if (machine__normalized_is(machine, "x86"))
- printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
- else
- printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
- if (lenp)
- *lenp = insn->size;
- cs_free(insn, count);
- } else {
- printed = -1;
- }
-
- cs_close(&cs_handle);
- return printed;
+ return capstone__fprintf_insn_asm(machine, thread, cpumode, is64bit, code, code_size,
+ ip, lenp, print_opts, fp);
}
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
@@ -166,13 +65,3 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa
return printed;
}
-#else
-size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused,
- struct thread *thread __maybe_unused,
- struct machine *machine __maybe_unused,
- FILE *fp __maybe_unused,
- struct addr_location *al __maybe_unused)
-{
- return 0;
-}
-#endif
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6ab2eb551b6c..710e4620923e 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2419,6 +2419,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
}
pev->nargs = 0;
zfree(&pev->args);
+ nsinfo__zput(pev->nsi);
}
#define strdup_or_goto(str, label) \
@@ -3767,12 +3768,11 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
/* Loop 3: cleanup and free trace events */
for (i = 0; i < npevs; i++) {
pev = &pevs[i];
- for (j = 0; j < pevs[i].ntevs; j++)
- clear_probe_trace_event(&pevs[i].tevs[j]);
- zfree(&pevs[i].tevs);
- pevs[i].ntevs = 0;
- nsinfo__zput(pev->nsi);
- clear_perf_probe_event(&pevs[i]);
+ for (j = 0; j < pev->ntevs; j++)
+ clear_probe_trace_event(&pev->tevs[j]);
+ zfree(&pev->tevs);
+ pev->ntevs = 0;
+ clear_perf_probe_event(pev);
}
}
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index ea77bea0306f..cc1019d29a5d 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -14,10 +14,12 @@
#include "evlist.h"
#include "evsel.h"
#include "event.h"
+#include "expr.h"
#include "print_binary.h"
#include "record.h"
#include "strbuf.h"
#include "thread_map.h"
+#include "tp_pmu.h"
#include "trace-event.h"
#include "metricgroup.h"
#include "mmap.h"
@@ -485,13 +487,19 @@ static PyObject *pyrf_event__new(const union perf_event *event)
if ((event->header.type < PERF_RECORD_MMAP ||
event->header.type > PERF_RECORD_SAMPLE) &&
!(event->header.type == PERF_RECORD_SWITCH ||
- event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) {
+ PyErr_Format(PyExc_TypeError, "Unexpected header type %u",
+ event->header.type);
return NULL;
+ }
// FIXME this better be dynamic or we need to parse everything
// before calling perf_mmap__consume(), including tracepoint fields.
- if (sizeof(pevent->event) < event->header.size)
+ if (sizeof(pevent->event) < event->header.size) {
+ PyErr_Format(PyExc_TypeError, "Unexpected event size: %zd < %u",
+ sizeof(pevent->event), event->header.size);
return NULL;
+ }
ptype = pyrf_event__type[event->header.type];
pevent = PyObject_New(struct pyrf_event, ptype);
@@ -642,6 +650,209 @@ static int pyrf_thread_map__setup_types(void)
return PyType_Ready(&pyrf_thread_map__type);
}
+/**
+ * A python wrapper for perf_pmus that are globally owned by the pmus.c code.
+ */
+struct pyrf_pmu {
+ PyObject_HEAD
+
+ struct perf_pmu *pmu;
+};
+
+static void pyrf_pmu__delete(struct pyrf_pmu *ppmu)
+{
+ Py_TYPE(ppmu)->tp_free((PyObject *)ppmu);
+}
+
+static PyObject *pyrf_pmu__name(PyObject *self)
+{
+ struct pyrf_pmu *ppmu = (void *)self;
+
+ return PyUnicode_FromString(ppmu->pmu->name);
+}
+
+static bool add_to_dict(PyObject *dict, const char *key, const char *value)
+{
+ PyObject *pkey, *pvalue;
+ bool ret;
+
+ if (value == NULL)
+ return true;
+
+ pkey = PyUnicode_FromString(key);
+ pvalue = PyUnicode_FromString(value);
+
+ ret = pkey && pvalue && PyDict_SetItem(dict, pkey, pvalue) == 0;
+ Py_XDECREF(pkey);
+ Py_XDECREF(pvalue);
+ return ret;
+}
+
+static int pyrf_pmu__events_cb(void *state, struct pmu_event_info *info)
+{
+ PyObject *py_list = state;
+ PyObject *dict = PyDict_New();
+
+ if (!dict)
+ return -ENOMEM;
+
+ if (!add_to_dict(dict, "name", info->name) ||
+ !add_to_dict(dict, "alias", info->alias) ||
+ !add_to_dict(dict, "scale_unit", info->scale_unit) ||
+ !add_to_dict(dict, "desc", info->desc) ||
+ !add_to_dict(dict, "long_desc", info->long_desc) ||
+ !add_to_dict(dict, "encoding_desc", info->encoding_desc) ||
+ !add_to_dict(dict, "topic", info->topic) ||
+ !add_to_dict(dict, "event_type_desc", info->event_type_desc) ||
+ !add_to_dict(dict, "str", info->str) ||
+ !add_to_dict(dict, "deprecated", info->deprecated ? "deprecated" : NULL) ||
+ PyList_Append(py_list, dict) != 0) {
+ Py_DECREF(dict);
+ return -ENOMEM;
+ }
+ Py_DECREF(dict);
+ return 0;
+}
+
+static PyObject *pyrf_pmu__events(PyObject *self)
+{
+ struct pyrf_pmu *ppmu = (void *)self;
+ PyObject *py_list = PyList_New(0);
+ int ret;
+
+ if (!py_list)
+ return NULL;
+
+ ret = perf_pmu__for_each_event(ppmu->pmu,
+ /*skip_duplicate_pmus=*/false,
+ py_list,
+ pyrf_pmu__events_cb);
+ if (ret) {
+ Py_DECREF(py_list);
+ errno = -ret;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ return py_list;
+}
+
+static PyObject *pyrf_pmu__repr(PyObject *self)
+{
+ struct pyrf_pmu *ppmu = (void *)self;
+
+ return PyUnicode_FromFormat("pmu(%s)", ppmu->pmu->name);
+}
+
+static const char pyrf_pmu__doc[] = PyDoc_STR("perf Performance Monitoring Unit (PMU) object.");
+
+static PyMethodDef pyrf_pmu__methods[] = {
+ {
+ .ml_name = "events",
+ .ml_meth = (PyCFunction)pyrf_pmu__events,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Returns a sequence of events encoded as a dictionaries.")
+ },
+ {
+ .ml_name = "name",
+ .ml_meth = (PyCFunction)pyrf_pmu__name,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Name of the PMU including suffixes.")
+ },
+ { .ml_name = NULL, }
+};
+
+/** The python type for a perf.pmu. */
+static PyTypeObject pyrf_pmu__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.pmu",
+ .tp_basicsize = sizeof(struct pyrf_pmu),
+ .tp_dealloc = (destructor)pyrf_pmu__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_pmu__doc,
+ .tp_methods = pyrf_pmu__methods,
+ .tp_str = pyrf_pmu__name,
+ .tp_repr = pyrf_pmu__repr,
+};
+
+static int pyrf_pmu__setup_types(void)
+{
+ pyrf_pmu__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_pmu__type);
+}
+
+
+/** A python iterator for pmus that has no equivalent in the C code. */
+struct pyrf_pmu_iterator {
+ PyObject_HEAD
+ struct perf_pmu *pmu;
+};
+
+static void pyrf_pmu_iterator__dealloc(struct pyrf_pmu_iterator *self)
+{
+ Py_TYPE(self)->tp_free((PyObject *) self);
+}
+
+static PyObject *pyrf_pmu_iterator__new(PyTypeObject *type, PyObject *args __maybe_unused,
+ PyObject *kwds __maybe_unused)
+{
+ struct pyrf_pmu_iterator *itr = (void *)type->tp_alloc(type, 0);
+
+ if (itr != NULL)
+ itr->pmu = perf_pmus__scan(/*pmu=*/NULL);
+
+ return (PyObject *) itr;
+}
+
+static PyObject *pyrf_pmu_iterator__iter(PyObject *self)
+{
+ Py_INCREF(self);
+ return self;
+}
+
+static PyObject *pyrf_pmu_iterator__iternext(PyObject *self)
+{
+ struct pyrf_pmu_iterator *itr = (void *)self;
+ struct pyrf_pmu *ppmu;
+
+ if (itr->pmu == NULL) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ // Create object to return.
+ ppmu = PyObject_New(struct pyrf_pmu, &pyrf_pmu__type);
+ if (ppmu) {
+ ppmu->pmu = itr->pmu;
+ // Advance iterator.
+ itr->pmu = perf_pmus__scan(itr->pmu);
+ }
+ return (PyObject *)ppmu;
+}
+
+/** The python type for the PMU iterator. */
+static PyTypeObject pyrf_pmu_iterator__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "pmus.iterator",
+ .tp_doc = "Iterator for the pmus string sequence.",
+ .tp_basicsize = sizeof(struct pyrf_pmu_iterator),
+ .tp_itemsize = 0,
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_new = pyrf_pmu_iterator__new,
+ .tp_dealloc = (destructor) pyrf_pmu_iterator__dealloc,
+ .tp_iter = pyrf_pmu_iterator__iter,
+ .tp_iternext = pyrf_pmu_iterator__iternext,
+};
+
+static int pyrf_pmu_iterator__setup_types(void)
+{
+ return PyType_Ready(&pyrf_pmu_iterator__type);
+}
+
+static PyObject *pyrf__pmus(PyObject *self, PyObject *args)
+{
+ // Calling the class creates an instance of the iterator.
+ return PyObject_CallObject((PyObject *) &pyrf_pmu_iterator__type, /*args=*/NULL);
+}
+
struct pyrf_counts_values {
PyObject_HEAD
@@ -1093,6 +1304,178 @@ static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist)
return (PyObject *)pcpu_map;
}
+static PyObject *pyrf_evlist__metrics(struct pyrf_evlist *pevlist)
+{
+ PyObject *list = PyList_New(/*len=*/0);
+ struct rb_node *node;
+
+ if (!list)
+ return NULL;
+
+ for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node;
+ node = rb_next(node)) {
+ struct metric_event *me = container_of(node, struct metric_event, nd);
+ struct list_head *pos;
+
+ list_for_each(pos, &me->head) {
+ struct metric_expr *expr = container_of(pos, struct metric_expr, nd);
+ PyObject *str = PyUnicode_FromString(expr->metric_name);
+
+ if (!str || PyList_Append(list, str) != 0) {
+ Py_DECREF(list);
+ return NULL;
+ }
+ Py_DECREF(str);
+ }
+ }
+ return list;
+}
+
+static int prepare_metric(const struct metric_expr *mexp,
+ const struct evsel *evsel,
+ struct expr_parse_ctx *pctx,
+ int cpu_idx, int thread_idx)
+{
+ struct evsel * const *metric_events = mexp->metric_events;
+ struct metric_ref *metric_refs = mexp->metric_refs;
+
+ for (int i = 0; metric_events[i]; i++) {
+ struct evsel *cur = metric_events[i];
+ double val, ena, run;
+ int ret, source_count = 0;
+ struct perf_counts_values *old_count, *new_count;
+ char *n = strdup(evsel__metric_id(cur));
+
+ if (!n)
+ return -ENOMEM;
+
+ /*
+ * If there are multiple uncore PMUs and we're not reading the
+ * leader's stats, determine the stats for the appropriate
+ * uncore PMU.
+ */
+ if (evsel && evsel->metric_leader &&
+ evsel->pmu != evsel->metric_leader->pmu &&
+ cur->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != cur)
+ continue;
+ cur = pos;
+ source_count = 1;
+ break;
+ }
+ }
+
+ if (source_count == 0)
+ source_count = evsel__source_count(cur);
+
+ ret = evsel__ensure_counts(cur);
+ if (ret)
+ return ret;
+
+ /* Set up pointers to the old and newly read counter values. */
+ old_count = perf_counts(cur->prev_raw_counts, cpu_idx, thread_idx);
+ new_count = perf_counts(cur->counts, cpu_idx, thread_idx);
+ /* Update the value in cur->counts. */
+ evsel__read_counter(cur, cpu_idx, thread_idx);
+
+ val = new_count->val - old_count->val;
+ ena = new_count->ena - old_count->ena;
+ run = new_count->run - old_count->run;
+
+ if (ena != run && run != 0)
+ val = val * ena / run;
+ ret = expr__add_id_val_source_count(pctx, n, val, source_count);
+ if (ret)
+ return ret;
+ }
+
+ for (int i = 0; metric_refs && metric_refs[i].metric_name; i++) {
+ int ret = expr__add_ref(pctx, &metric_refs[i]);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ int ret, cpu = 0, cpu_idx = 0, thread = 0, thread_idx = 0;
+ const char *metric;
+ struct rb_node *node;
+ struct metric_expr *mexp = NULL;
+ struct expr_parse_ctx *pctx;
+ double result = 0;
+ struct evsel *metric_evsel = NULL;
+
+ if (!PyArg_ParseTuple(args, "sii", &metric, &cpu, &thread))
+ return NULL;
+
+ for (node = rb_first_cached(&pevlist->evlist.metric_events.entries);
+ mexp == NULL && node;
+ node = rb_next(node)) {
+ struct metric_event *me = container_of(node, struct metric_event, nd);
+ struct list_head *pos;
+
+ list_for_each(pos, &me->head) {
+ struct metric_expr *e = container_of(pos, struct metric_expr, nd);
+ struct evsel *pos2;
+
+ if (strcmp(e->metric_name, metric))
+ continue;
+
+ if (e->metric_events[0] == NULL)
+ continue;
+
+ evlist__for_each_entry(&pevlist->evlist, pos2) {
+ if (pos2->metric_leader != e->metric_events[0])
+ continue;
+ cpu_idx = perf_cpu_map__idx(pos2->core.cpus,
+ (struct perf_cpu){.cpu = cpu});
+ if (cpu_idx < 0)
+ continue;
+
+ thread_idx = perf_thread_map__idx(pos2->core.threads, thread);
+ if (thread_idx < 0)
+ continue;
+ metric_evsel = pos2;
+ mexp = e;
+ goto done;
+ }
+ }
+ }
+done:
+ if (!mexp) {
+ PyErr_Format(PyExc_TypeError, "Unknown metric '%s' for CPU '%d' and thread '%d'",
+ metric, cpu, thread);
+ return NULL;
+ }
+
+ pctx = expr__ctx_new();
+ if (!pctx)
+ return PyErr_NoMemory();
+
+ ret = prepare_metric(mexp, metric_evsel, pctx, cpu_idx, thread_idx);
+ if (ret) {
+ expr__ctx_free(pctx);
+ errno = -ret;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ if (expr__parse(&result, pctx, mexp->metric_expr))
+ result = 0.0;
+
+ expr__ctx_free(pctx);
+ return PyFloat_FromDouble(result);
+}
+
static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
PyObject *args, PyObject *kwargs)
{
@@ -1209,8 +1592,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
return NULL;
md = get_md(evlist, cpu);
- if (!md)
+ if (!md) {
+ PyErr_Format(PyExc_TypeError, "Unknown CPU '%d'", cpu);
return NULL;
+ }
if (perf_mmap__read_init(&md->core) < 0)
goto end;
@@ -1320,6 +1705,18 @@ static PyMethodDef pyrf_evlist__methods[] = {
.ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.")
},
{
+ .ml_name = "metrics",
+ .ml_meth = (PyCFunction)pyrf_evlist__metrics,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("List of metric names within the evlist.")
+ },
+ {
+ .ml_name = "compute_metric",
+ .ml_meth = (PyCFunction)pyrf_evlist__compute_metric,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("compute metric for given name, cpu and thread")
+ },
+ {
.ml_name = "mmap",
.ml_meth = (PyCFunction)pyrf_evlist__mmap,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1546,10 +1943,6 @@ static const struct perf_constant perf__constants[] = {
static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
PyObject *args, PyObject *kwargs)
{
-#ifndef HAVE_LIBTRACEEVENT
- return NULL;
-#else
- struct tep_event *tp_format;
static char *kwlist[] = { "sys", "name", NULL };
char *sys = NULL;
char *name = NULL;
@@ -1558,12 +1951,7 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
&sys, &name))
return NULL;
- tp_format = trace_event__tp_format(sys, name);
- if (IS_ERR(tp_format))
- return PyLong_FromLong(-1);
-
- return PyLong_FromLong(tp_format->id);
-#endif // HAVE_LIBTRACEEVENT
+ return PyLong_FromLong(tp_pmu__id(sys, name));
}
static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
@@ -1635,6 +2023,17 @@ static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist)
else if (leader == NULL)
evsel__set_leader(pos, pos);
}
+
+ leader = pos->metric_leader;
+
+ if (pos != leader) {
+ int idx = evlist__pos(evlist, leader);
+
+ if (idx >= 0)
+ pos->metric_leader = evlist__at(&pevlist->evlist, idx);
+ else if (leader == NULL)
+ pos->metric_leader = pos;
+ }
}
metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL,
&pevlist->evlist.metric_events,
@@ -1688,8 +2087,128 @@ static PyObject *pyrf__parse_events(PyObject *self, PyObject *args)
return result;
}
+static PyObject *pyrf__parse_metrics(PyObject *self, PyObject *args)
+{
+ const char *input, *pmu = NULL;
+ struct evlist evlist = {};
+ PyObject *result;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+ int ret;
+
+ if (!PyArg_ParseTuple(args, "s|sOO", &input, &pmu, &pcpus, &pthreads))
+ return NULL;
+
+ threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL;
+ cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL;
+
+ evlist__init(&evlist, cpus, threads);
+ ret = metricgroup__parse_groups(&evlist, pmu ?: "all", input,
+ /*metric_no_group=*/ false,
+ /*metric_no_merge=*/ false,
+ /*metric_no_threshold=*/ true,
+ /*user_requested_cpu_list=*/ NULL,
+ /*system_wide=*/true,
+ /*hardware_aware_grouping=*/ false);
+ if (ret) {
+ errno = -ret;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ result = pyrf_evlist__from_evlist(&evlist);
+ evlist__exit(&evlist);
+ return result;
+}
+
+static PyObject *pyrf__metrics_groups(const struct pmu_metric *pm)
+{
+ PyObject *groups = PyList_New(/*len=*/0);
+ const char *mg = pm->metric_group;
+
+ if (!groups)
+ return NULL;
+
+ while (mg) {
+ PyObject *val = NULL;
+ const char *sep = strchr(mg, ';');
+ size_t len = sep ? (size_t)(sep - mg) : strlen(mg);
+
+ if (len > 0) {
+ val = PyUnicode_FromStringAndSize(mg, len);
+ if (val)
+ PyList_Append(groups, val);
+
+ Py_XDECREF(val);
+ }
+ mg = sep ? sep + 1 : NULL;
+ }
+ return groups;
+}
+
+static int pyrf__metrics_cb(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ PyObject *py_list = vdata;
+ PyObject *dict = PyDict_New();
+ PyObject *key = dict ? PyUnicode_FromString("MetricGroup") : NULL;
+ PyObject *value = key ? pyrf__metrics_groups(pm) : NULL;
+
+ if (!value || PyDict_SetItem(dict, key, value) != 0) {
+ Py_XDECREF(key);
+ Py_XDECREF(value);
+ Py_XDECREF(dict);
+ return -ENOMEM;
+ }
+
+ if (!add_to_dict(dict, "MetricName", pm->metric_name) ||
+ !add_to_dict(dict, "PMU", pm->pmu) ||
+ !add_to_dict(dict, "MetricExpr", pm->metric_expr) ||
+ !add_to_dict(dict, "MetricThreshold", pm->metric_threshold) ||
+ !add_to_dict(dict, "ScaleUnit", pm->unit) ||
+ !add_to_dict(dict, "Compat", pm->compat) ||
+ !add_to_dict(dict, "BriefDescription", pm->desc) ||
+ !add_to_dict(dict, "PublicDescription", pm->long_desc) ||
+ PyList_Append(py_list, dict) != 0) {
+ Py_DECREF(dict);
+ return -ENOMEM;
+ }
+ Py_DECREF(dict);
+ return 0;
+}
+
+static PyObject *pyrf__metrics(PyObject *self, PyObject *args)
+{
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
+ PyObject *list = PyList_New(/*len=*/0);
+ int ret;
+
+ if (!list)
+ return NULL;
+
+ ret = pmu_metrics_table__for_each_metric(table, pyrf__metrics_cb, list);
+ if (!ret)
+ ret = pmu_for_each_sys_metric(pyrf__metrics_cb, list);
+
+ if (ret) {
+ Py_DECREF(list);
+ errno = -ret;
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ return list;
+}
+
static PyMethodDef perf__methods[] = {
{
+ .ml_name = "metrics",
+ .ml_meth = (PyCFunction) pyrf__metrics,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR(
+ "Returns a list of metrics represented as string values in dictionaries.")
+ },
+ {
.ml_name = "tracepoint",
.ml_meth = (PyCFunction) pyrf__tracepoint,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1701,6 +2220,19 @@ static PyMethodDef perf__methods[] = {
.ml_flags = METH_VARARGS,
.ml_doc = PyDoc_STR("Parse a string of events and return an evlist.")
},
+ {
+ .ml_name = "parse_metrics",
+ .ml_meth = (PyCFunction) pyrf__parse_metrics,
+ .ml_flags = METH_VARARGS,
+ .ml_doc = PyDoc_STR(
+ "Parse a string of metrics or metric groups and return an evlist.")
+ },
+ {
+ .ml_name = "pmus",
+ .ml_meth = (PyCFunction) pyrf__pmus,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Returns a sequence of pmus.")
+ },
{ .ml_name = NULL, }
};
@@ -1728,6 +2260,8 @@ PyMODINIT_FUNC PyInit_perf(void)
pyrf_evsel__setup_types() < 0 ||
pyrf_thread_map__setup_types() < 0 ||
pyrf_cpu_map__setup_types() < 0 ||
+ pyrf_pmu_iterator__setup_types() < 0 ||
+ pyrf_pmu__setup_types() < 0 ||
pyrf_counts_values__setup_types() < 0)
return module;
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 335217bb532b..c6ae0ae8d86a 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -19,12 +19,14 @@
#include <sys/stat.h>
#include <linux/compiler.h>
+#include <linux/err.h>
#include <asm/byteorder.h>
#include "debug.h"
#include "session.h"
#include "evlist.h"
#include "color.h"
+#include "hashmap.h"
#include "sample-raw.h"
#include "s390-cpumcf-kernel.h"
#include "util/pmu.h"
@@ -132,8 +134,8 @@ static int get_counterset_start(int setnr)
}
struct get_counter_name_data {
- int wanted;
- char *result;
+ long wanted;
+ const char *result;
};
static int get_counter_name_callback(void *vdata, struct pmu_event_info *info)
@@ -151,12 +153,22 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info)
rc = sscanf(event_str, "event=%x", &event_nr);
if (rc == 1 && event_nr == data->wanted) {
- data->result = strdup(info->name);
+ data->result = info->name;
return 1; /* Terminate the search. */
}
return 0;
}
+static size_t get_counter_name_hash_fn(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool get_counter_name_hashmap_equal_fn(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
/* Scan the PMU and extract the logical name of a counter from the event. Input
* is the counter set and counter number with in the set. Construct the event
* number and use this as key. If they match return the name of this counter.
@@ -164,17 +176,50 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info)
*/
static char *get_counter_name(int set, int nr, struct perf_pmu *pmu)
{
+ static struct hashmap *cache;
+ static struct perf_pmu *cache_pmu;
+ long cache_key = get_counterset_start(set) + nr;
struct get_counter_name_data data = {
- .wanted = get_counterset_start(set) + nr,
+ .wanted = cache_key,
.result = NULL,
};
+ char *result = NULL;
if (!pmu)
return NULL;
+ if (cache_pmu == pmu && hashmap__find(cache, cache_key, &result))
+ return strdup(result);
+
perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true,
&data, get_counter_name_callback);
- return data.result;
+
+ result = strdup(data.result ?: "<unknown>");
+
+ if (cache_pmu == NULL) {
+ struct hashmap *tmp = hashmap__new(get_counter_name_hash_fn,
+ get_counter_name_hashmap_equal_fn,
+ /*ctx=*/NULL);
+
+ if (!IS_ERR(tmp)) {
+ cache = tmp;
+ cache_pmu = pmu;
+ }
+ }
+
+ if (cache_pmu == pmu && result) {
+ char *old_value = NULL, *new_value = strdup(result);
+
+ if (new_value) {
+ hashmap__set(cache, cache_key, new_value, /*old_key=*/NULL, &old_value);
+ /*
+ * Free in case of a race, but resizing would be broken
+ * in that case.
+ */
+ free(old_value);
+ }
+ }
+ return result;
}
static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef4..a8307b20a9ea 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -107,6 +107,8 @@ struct perf_sample {
/** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
u16 weight3;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
+ bool deferred_callchain; /* Has deferred user callchains */
+ u64 deferred_cookie;
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
index 2282fe3772f3..24f087b0cd11 100644
--- a/tools/perf/util/scripting-engines/Build
+++ b/tools/perf/util/scripting-engines/Build
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_LIBTRACEEVENT),y)
endif
perf-util-$(CONFIG_LIBPYTHON) += trace-event-python.o
-CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -Wno-thread-safety-analysis
# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 26ae078278cd..4236503c8f6c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
[PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap,
+ [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
for (i = 0; i < callchain->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
i, callchain->ips[i]);
+
+ if (sample->deferred_callchain)
+ printf("...... (deferred)\n");
}
static void branch_stack__printf(struct perf_sample *sample,
@@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
sample_read__printf(sample, evsel->core.attr.read_format);
}
+static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!dump_trace)
+ return;
+
+ printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
+ event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
+
+ if (evsel__has_callchain(evsel))
+ callchain__printf(evsel, sample);
+}
+
static void dump_read(struct evsel *evsel, union perf_event *event)
{
struct perf_record_read *read_event = &event->read;
@@ -1268,6 +1285,106 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
per_thread);
}
+/*
+ * Samples with deferred callchains should wait for the next matching
+ * PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and
+ * deliver them once it finds the callchains.
+ */
+struct deferred_event {
+ struct list_head list;
+ union perf_event *event;
+};
+
+/*
+ * This is called when a deferred callchain record comes up. Find all matching
+ * samples, merge the callchains and process them.
+ */
+static int evlist__deliver_deferred_callchain(struct evlist *evlist,
+ const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct deferred_event *de, *tmp;
+ struct evsel *evsel;
+ int ret = 0;
+
+ if (!tool->merge_deferred_callchains) {
+ evsel = evlist__id2evsel(evlist, sample->id);
+ return tool->callchain_deferred(tool, event, sample,
+ evsel, machine);
+ }
+
+ list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
+ struct perf_sample orig_sample;
+
+ ret = evlist__parse_sample(evlist, de->event, &orig_sample);
+ if (ret < 0) {
+ pr_err("failed to parse original sample\n");
+ break;
+ }
+
+ if (sample->tid != orig_sample.tid)
+ continue;
+
+ if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
+ sample__merge_deferred_callchain(&orig_sample, sample);
+ else
+ orig_sample.deferred_callchain = false;
+
+ evsel = evlist__id2evsel(evlist, orig_sample.id);
+ ret = evlist__deliver_sample(evlist, tool, de->event,
+ &orig_sample, evsel, machine);
+
+ if (orig_sample.deferred_callchain)
+ free(orig_sample.callchain);
+
+ list_del(&de->list);
+ free(de->event);
+ free(de);
+
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * This is called at the end of the data processing for the session. Flush the
+ * remaining samples as there's no hope for matching deferred callchains.
+ */
+static int session__flush_deferred_samples(struct perf_session *session,
+ const struct perf_tool *tool)
+{
+ struct evlist *evlist = session->evlist;
+ struct machine *machine = &session->machines.host;
+ struct deferred_event *de, *tmp;
+ struct evsel *evsel;
+ int ret = 0;
+
+ list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
+ struct perf_sample sample;
+
+ ret = evlist__parse_sample(evlist, de->event, &sample);
+ if (ret < 0) {
+ pr_err("failed to parse original sample\n");
+ break;
+ }
+
+ evsel = evlist__id2evsel(evlist, sample.id);
+ ret = evlist__deliver_sample(evlist, tool, de->event,
+ &sample, evsel, machine);
+
+ list_del(&de->list);
+ free(de->event);
+ free(de);
+
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
@@ -1296,6 +1413,22 @@ static int machines__deliver_event(struct machines *machines,
return 0;
}
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
+ if (sample->deferred_callchain && tool->merge_deferred_callchains) {
+ struct deferred_event *de = malloc(sizeof(*de));
+ size_t sz = event->header.size;
+
+ if (de == NULL)
+ return -ENOMEM;
+
+ de->event = malloc(sz);
+ if (de->event == NULL) {
+ free(de);
+ return -ENOMEM;
+ }
+ memcpy(de->event, event, sz);
+ list_add_tail(&de->list, &evlist->deferred_samples);
+ return 0;
+ }
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
@@ -1353,6 +1486,10 @@ static int machines__deliver_event(struct machines *machines,
return tool->text_poke(tool, event, sample, machine);
case PERF_RECORD_AUX_OUTPUT_HW_ID:
return tool->aux_output_hw_id(tool, event, sample, machine);
+ case PERF_RECORD_CALLCHAIN_DEFERRED:
+ dump_deferred_callchain(evsel, event, sample);
+ return evlist__deliver_deferred_callchain(evlist, tool, event,
+ sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -1402,7 +1539,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
const struct perf_tool *tool = session->tool;
struct perf_sample sample;
int fd = perf_data__fd(session->data);
- int err;
+ s64 err;
perf_sample__init(&sample, /*all=*/true);
if ((event->header.type != PERF_RECORD_COMPRESSED &&
@@ -1437,19 +1574,19 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset, SEEK_SET);
- err = tool->tracing_data(session, event);
+ err = tool->tracing_data(tool, session, event);
break;
case PERF_RECORD_HEADER_BUILD_ID:
- err = tool->build_id(session, event);
+ err = tool->build_id(tool, session, event);
break;
case PERF_RECORD_FINISHED_ROUND:
err = tool->finished_round(tool, event, oe);
break;
case PERF_RECORD_ID_INDEX:
- err = tool->id_index(session, event);
+ err = tool->id_index(tool, session, event);
break;
case PERF_RECORD_AUXTRACE_INFO:
- err = tool->auxtrace_info(session, event);
+ err = tool->auxtrace_info(tool, session, event);
break;
case PERF_RECORD_AUXTRACE:
/*
@@ -1459,45 +1596,45 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset + event->header.size, SEEK_SET);
- err = tool->auxtrace(session, event);
+ err = tool->auxtrace(tool, session, event);
break;
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
- err = tool->auxtrace_error(session, event);
+ err = tool->auxtrace_error(tool, session, event);
break;
case PERF_RECORD_THREAD_MAP:
- err = tool->thread_map(session, event);
+ err = tool->thread_map(tool, session, event);
break;
case PERF_RECORD_CPU_MAP:
- err = tool->cpu_map(session, event);
+ err = tool->cpu_map(tool, session, event);
break;
case PERF_RECORD_STAT_CONFIG:
- err = tool->stat_config(session, event);
+ err = tool->stat_config(tool, session, event);
break;
case PERF_RECORD_STAT:
- err = tool->stat(session, event);
+ err = tool->stat(tool, session, event);
break;
case PERF_RECORD_STAT_ROUND:
- err = tool->stat_round(session, event);
+ err = tool->stat_round(tool, session, event);
break;
case PERF_RECORD_TIME_CONV:
session->time_conv = event->time_conv;
- err = tool->time_conv(session, event);
+ err = tool->time_conv(tool, session, event);
break;
case PERF_RECORD_HEADER_FEATURE:
- err = tool->feature(session, event);
+ err = tool->feature(tool, session, event);
break;
case PERF_RECORD_COMPRESSED:
case PERF_RECORD_COMPRESSED2:
- err = tool->compressed(session, event, file_offset, file_path);
+ err = tool->compressed(tool, session, event, file_offset, file_path);
if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path);
break;
case PERF_RECORD_FINISHED_INIT:
- err = tool->finished_init(session, event);
+ err = tool->finished_init(tool, session, event);
break;
case PERF_RECORD_BPF_METADATA:
- err = tool->bpf_metadata(session, event);
+ err = tool->bpf_metadata(tool, session, event);
break;
default:
err = -EINVAL;
@@ -1943,6 +2080,9 @@ done:
err = ordered_events__flush(oe, OE_FLUSH__FINAL);
if (err)
goto out_err;
+ err = session__flush_deferred_samples(session, tool);
+ if (err)
+ goto out_err;
err = auxtrace__flush_events(session, tool);
if (err)
goto out_err;
@@ -2289,6 +2429,9 @@ static int __perf_session__process_events(struct perf_session *session)
err = auxtrace__flush_events(session, tool);
if (err)
goto out_err;
+ err = session__flush_deferred_samples(session, tool);
+ if (err)
+ goto out_err;
err = perf_session__flush_thread_stacks(session);
out_err:
ui_progress__finish();
@@ -2409,6 +2552,10 @@ static int __perf_session__process_dir_events(struct perf_session *session)
if (ret)
goto out_err;
+ ret = session__flush_deferred_samples(session, tool);
+ if (ret)
+ goto out_err;
+
ret = perf_session__flush_thread_stacks(session);
out_err:
ui_progress__finish();
@@ -2647,7 +2794,8 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid,
return 0;
}
-int perf_event__process_id_index(struct perf_session *session,
+int perf_event__process_id_index(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct evlist *evlist = session->evlist;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index cf88d65a25cb..22d3ff877e83 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -202,7 +202,8 @@ int perf_session__deliver_synth_attr_event(struct perf_session *session,
int perf_session__dsos_hit_all(struct perf_session *session);
-int perf_event__process_id_index(struct perf_session *session,
+int perf_event__process_id_index(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
int perf_event__process_finished_round(const struct perf_tool *tool,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index dd289d15acfd..b65b1792ca05 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,6 +1,7 @@
from os import getenv, path
from subprocess import Popen, PIPE
from re import sub
+import shlex
cc = getenv("CC")
assert cc, "Environment variable CC not set"
@@ -22,8 +23,17 @@ assert srctree, "Environment variable srctree, for the Linux sources, not set"
src_feature_tests = f'{srctree}/tools/build/feature'
def clang_has_option(option):
- cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
- return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o) or (b"unknown warning option" in o))] == [ ]
+ error_substrings = (
+ b"unknown argument",
+ b"is not supported",
+ b"unknown warning option"
+ )
+ cmd = shlex.split(f"{cc} {cc_options} {option}") + [
+ "-o", "/dev/null",
+ path.join(src_feature_tests, "test-hello.c")
+ ]
+ cc_output = Popen(cmd, stderr=PIPE).stderr.readlines()
+ return not any(any(error in line for error in error_substrings) for line in cc_output)
if cc_is_clang:
from sysconfig import get_config_vars
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 3e3449e35dd4..27c0966611ab 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -1,32 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/zalloc.h>
-
-#include <api/io.h>
-
-#include "util/dso.h"
-#include "util/debug.h"
-#include "util/callchain.h"
-#include "util/symbol_conf.h"
-#ifdef HAVE_LIBLLVM_SUPPORT
-#include "util/llvm-c-helpers.h"
-#endif
#include "srcline.h"
-#include "string2.h"
+#include "addr2line.h"
+#include "dso.h"
+#include "callchain.h"
+#include "libbfd.h"
+#include "llvm.h"
#include "symbol.h"
-#include "subcmd/run-command.h"
-/* If addr2line doesn't return data for 1 second then timeout. */
-int addr2line_timeout_ms = 1 * 1000;
+#include <inttypes.h>
+#include <string.h>
+
bool srcline_full_filename;
char *srcline__unknown = (char *)"??:0";
@@ -49,8 +32,7 @@ static const char *srcline_dso_name(struct dso *dso)
return dso_name;
}
-static int inline_list__append(struct symbol *symbol, char *srcline,
- struct inline_node *node)
+int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node)
{
struct inline_list *ilist;
@@ -77,7 +59,7 @@ static const char *gnu_basename(const char *path)
return base ? base + 1 : path;
}
-static char *srcline_from_fileline(const char *file, unsigned int line)
+char *srcline_from_fileline(const char *file, unsigned int line)
{
char *srcline;
@@ -93,9 +75,9 @@ static char *srcline_from_fileline(const char *file, unsigned int line)
return srcline;
}
-static struct symbol *new_inline_sym(struct dso *dso,
- struct symbol *base_sym,
- const char *funcname)
+struct symbol *new_inline_sym(struct dso *dso,
+ struct symbol *base_sym,
+ const char *funcname)
{
struct symbol *inline_sym;
char *demangled = NULL;
@@ -132,722 +114,23 @@ static struct symbol *new_inline_sym(struct dso *dso,
return inline_sym;
}
-#define MAX_INLINE_NEST 1024
-
-#ifdef HAVE_LIBLLVM_SUPPORT
-
-static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames,
- int num_frames)
-{
- if (inline_frames != NULL) {
- for (int i = 0; i < num_frames; ++i) {
- zfree(&inline_frames[i].filename);
- zfree(&inline_frames[i].funcname);
- }
- zfree(&inline_frames);
- }
-}
-
-static int addr2line(const char *dso_name, u64 addr,
- char **file, unsigned int *line, struct dso *dso,
- bool unwind_inlines, struct inline_node *node,
+static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr,
+ struct dso *dso, bool unwind_inlines, struct inline_node *node,
struct symbol *sym)
{
- struct llvm_a2l_frame *inline_frames = NULL;
- int num_frames = llvm_addr2line(dso_name, addr, file, line,
- node && unwind_inlines, &inline_frames);
-
- if (num_frames == 0 || !inline_frames) {
- /* Error, or we didn't want inlines. */
- return num_frames;
- }
-
- for (int i = 0; i < num_frames; ++i) {
- struct symbol *inline_sym =
- new_inline_sym(dso, sym, inline_frames[i].funcname);
- char *srcline = NULL;
-
- if (inline_frames[i].filename) {
- srcline =
- srcline_from_fileline(inline_frames[i].filename,
- inline_frames[i].line);
- }
- if (inline_list__append(inline_sym, srcline, node) != 0) {
- free_llvm_inline_frames(inline_frames, num_frames);
- return 0;
- }
- }
- free_llvm_inline_frames(inline_frames, num_frames);
-
- return num_frames;
-}
-
-void dso__free_a2l(struct dso *dso __maybe_unused)
-{
- /* Nothing to free. */
-}
-
-#elif defined(HAVE_LIBBFD_SUPPORT)
-
-/*
- * Implement addr2line using libbfd.
- */
-#define PACKAGE "perf"
-#include <bfd.h>
-
-struct a2l_data {
- const char *input;
- u64 addr;
-
- bool found;
- const char *filename;
- const char *funcname;
- unsigned line;
-
- bfd *abfd;
- asymbol **syms;
-};
-
-static int bfd_error(const char *string)
-{
- const char *errmsg;
-
- errmsg = bfd_errmsg(bfd_get_error());
- fflush(stdout);
-
- if (string)
- pr_debug("%s: %s\n", string, errmsg);
- else
- pr_debug("%s\n", errmsg);
-
- return -1;
-}
-
-static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
-{
- long storage;
- long symcount;
- asymbol **syms;
- bfd_boolean dynamic = FALSE;
-
- if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
- return bfd_error(bfd_get_filename(abfd));
-
- storage = bfd_get_symtab_upper_bound(abfd);
- if (storage == 0L) {
- storage = bfd_get_dynamic_symtab_upper_bound(abfd);
- dynamic = TRUE;
- }
- if (storage < 0L)
- return bfd_error(bfd_get_filename(abfd));
-
- syms = malloc(storage);
- if (dynamic)
- symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
- else
- symcount = bfd_canonicalize_symtab(abfd, syms);
-
- if (symcount < 0) {
- free(syms);
- return bfd_error(bfd_get_filename(abfd));
- }
-
- a2l->syms = syms;
- return 0;
-}
-
-static void find_address_in_section(bfd *abfd, asection *section, void *data)
-{
- bfd_vma pc, vma;
- bfd_size_type size;
- struct a2l_data *a2l = data;
- flagword flags;
-
- if (a2l->found)
- return;
+ int ret;
-#ifdef bfd_get_section_flags
- flags = bfd_get_section_flags(abfd, section);
-#else
- flags = bfd_section_flags(section);
-#endif
- if ((flags & SEC_ALLOC) == 0)
- return;
-
- pc = a2l->addr;
-#ifdef bfd_get_section_vma
- vma = bfd_get_section_vma(abfd, section);
-#else
- vma = bfd_section_vma(section);
-#endif
-#ifdef bfd_get_section_size
- size = bfd_get_section_size(section);
-#else
- size = bfd_section_size(section);
-#endif
-
- if (pc < vma || pc >= vma + size)
- return;
+ ret = llvm__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym);
+ if (ret > 0)
+ return ret;
- a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
- &a2l->filename, &a2l->funcname,
- &a2l->line);
-
- if (a2l->filename && !strlen(a2l->filename))
- a2l->filename = NULL;
-}
-
-static struct a2l_data *addr2line_init(const char *path)
-{
- bfd *abfd;
- struct a2l_data *a2l = NULL;
-
- abfd = bfd_openr(path, NULL);
- if (abfd == NULL)
- return NULL;
+ ret = libbfd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym);
+ if (ret > 0)
+ return ret;
- if (!bfd_check_format(abfd, bfd_object))
- goto out;
-
- a2l = zalloc(sizeof(*a2l));
- if (a2l == NULL)
- goto out;
-
- a2l->abfd = abfd;
- a2l->input = strdup(path);
- if (a2l->input == NULL)
- goto out;
-
- if (slurp_symtab(abfd, a2l))
- goto out;
-
- return a2l;
-
-out:
- if (a2l) {
- zfree((char **)&a2l->input);
- free(a2l);
- }
- bfd_close(abfd);
- return NULL;
+ return cmd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym);
}
-static void addr2line_cleanup(struct a2l_data *a2l)
-{
- if (a2l->abfd)
- bfd_close(a2l->abfd);
- zfree((char **)&a2l->input);
- zfree(&a2l->syms);
- free(a2l);
-}
-
-static int inline_list__append_dso_a2l(struct dso *dso,
- struct inline_node *node,
- struct symbol *sym)
-{
- struct a2l_data *a2l = dso__a2l(dso);
- struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
- char *srcline = NULL;
-
- if (a2l->filename)
- srcline = srcline_from_fileline(a2l->filename, a2l->line);
-
- return inline_list__append(inline_sym, srcline, node);
-}
-
-static int addr2line(const char *dso_name, u64 addr,
- char **file, unsigned int *line, struct dso *dso,
- bool unwind_inlines, struct inline_node *node,
- struct symbol *sym)
-{
- int ret = 0;
- struct a2l_data *a2l = dso__a2l(dso);
-
- if (!a2l) {
- a2l = addr2line_init(dso_name);
- dso__set_a2l(dso, a2l);
- }
-
- if (a2l == NULL) {
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("addr2line_init failed for %s\n", dso_name);
- return 0;
- }
-
- a2l->addr = addr;
- a2l->found = false;
-
- bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
-
- if (!a2l->found)
- return 0;
-
- if (unwind_inlines) {
- int cnt = 0;
-
- if (node && inline_list__append_dso_a2l(dso, node, sym))
- return 0;
-
- while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
- &a2l->funcname, &a2l->line) &&
- cnt++ < MAX_INLINE_NEST) {
-
- if (a2l->filename && !strlen(a2l->filename))
- a2l->filename = NULL;
-
- if (node != NULL) {
- if (inline_list__append_dso_a2l(dso, node, sym))
- return 0;
- // found at least one inline frame
- ret = 1;
- }
- }
- }
-
- if (file) {
- *file = a2l->filename ? strdup(a2l->filename) : NULL;
- ret = *file ? 1 : 0;
- }
-
- if (line)
- *line = a2l->line;
-
- return ret;
-}
-
-void dso__free_a2l(struct dso *dso)
-{
- struct a2l_data *a2l = dso__a2l(dso);
-
- if (!a2l)
- return;
-
- addr2line_cleanup(a2l);
-
- dso__set_a2l(dso, NULL);
-}
-
-#else /* HAVE_LIBBFD_SUPPORT */
-
-static int filename_split(char *filename, unsigned int *line_nr)
-{
- char *sep;
-
- sep = strchr(filename, '\n');
- if (sep)
- *sep = '\0';
-
- if (!strcmp(filename, "??:0"))
- return 0;
-
- sep = strchr(filename, ':');
- if (sep) {
- *sep++ = '\0';
- *line_nr = strtoul(sep, NULL, 0);
- return 1;
- }
- pr_debug("addr2line missing ':' in filename split\n");
- return 0;
-}
-
-static void addr2line_subprocess_cleanup(struct child_process *a2l)
-{
- if (a2l->pid != -1) {
- kill(a2l->pid, SIGKILL);
- finish_command(a2l); /* ignore result, we don't care */
- a2l->pid = -1;
- close(a2l->in);
- close(a2l->out);
- }
-
- free(a2l);
-}
-
-static struct child_process *addr2line_subprocess_init(const char *addr2line_path,
- const char *binary_path)
-{
- const char *argv[] = {
- addr2line_path ?: "addr2line",
- "-e", binary_path,
- "-a", "-i", "-f", NULL
- };
- struct child_process *a2l = zalloc(sizeof(*a2l));
- int start_command_status = 0;
-
- if (a2l == NULL) {
- pr_err("Failed to allocate memory for addr2line");
- return NULL;
- }
-
- a2l->pid = -1;
- a2l->in = -1;
- a2l->out = -1;
- a2l->no_stderr = 1;
-
- a2l->argv = argv;
- start_command_status = start_command(a2l);
- a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */
-
- if (start_command_status != 0) {
- pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n",
- addr2line_path, binary_path, start_command_status);
- addr2line_subprocess_cleanup(a2l);
- return NULL;
- }
-
- return a2l;
-}
-
-enum a2l_style {
- BROKEN,
- GNU_BINUTILS,
- LLVM,
-};
-
-static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name)
-{
- static bool cached;
- static enum a2l_style style;
-
- if (!cached) {
- char buf[128];
- struct io io;
- int ch;
- int lines;
-
- if (write(a2l->in, ",\n", 2) != 2)
- return BROKEN;
-
- io__init(&io, a2l->out, buf, sizeof(buf));
- ch = io__get_char(&io);
- if (ch == ',') {
- style = LLVM;
- cached = true;
- lines = 1;
- pr_debug3("Detected LLVM addr2line style\n");
- } else if (ch == '0') {
- style = GNU_BINUTILS;
- cached = true;
- lines = 3;
- pr_debug3("Detected binutils addr2line style\n");
- } else {
- if (!symbol_conf.disable_add2line_warn) {
- char *output = NULL;
- size_t output_len;
-
- io__getline(&io, &output, &output_len);
- pr_warning("%s %s: addr2line configuration failed\n",
- __func__, dso_name);
- pr_warning("\t%c%s", ch, output);
- }
- pr_debug("Unknown/broken addr2line style\n");
- return BROKEN;
- }
- while (lines) {
- ch = io__get_char(&io);
- if (ch <= 0)
- break;
- if (ch == '\n')
- lines--;
- }
- /* Ignore SIGPIPE in the event addr2line exits. */
- signal(SIGPIPE, SIG_IGN);
- }
- return style;
-}
-
-static int read_addr2line_record(struct io *io,
- enum a2l_style style,
- const char *dso_name,
- u64 addr,
- bool first,
- char **function,
- char **filename,
- unsigned int *line_nr)
-{
- /*
- * Returns:
- * -1 ==> error
- * 0 ==> sentinel (or other ill-formed) record read
- * 1 ==> a genuine record read
- */
- char *line = NULL;
- size_t line_len = 0;
- unsigned int dummy_line_nr = 0;
- int ret = -1;
-
- if (function != NULL)
- zfree(function);
-
- if (filename != NULL)
- zfree(filename);
-
- if (line_nr != NULL)
- *line_nr = 0;
-
- /*
- * Read the first line. Without an error this will be:
- * - for the first line an address like 0x1234,
- * - the binutils sentinel 0x0000000000000000,
- * - the llvm-addr2line the sentinel ',' character,
- * - the function name line for an inlined function.
- */
- if (io__getline(io, &line, &line_len) < 0 || !line_len)
- goto error;
-
- pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
- if (style == LLVM && line_len == 2 && line[0] == ',') {
- /* Found the llvm-addr2line sentinel character. */
- zfree(&line);
- return 0;
- } else if (style == GNU_BINUTILS && (!first || addr != 0)) {
- int zero_count = 0, non_zero_count = 0;
- /*
- * Check for binutils sentinel ignoring it for the case the
- * requested address is 0.
- */
-
- /* A given address should always start 0x. */
- if (line_len >= 2 || line[0] != '0' || line[1] != 'x') {
- for (size_t i = 2; i < line_len; i++) {
- if (line[i] == '0')
- zero_count++;
- else if (line[i] != '\n')
- non_zero_count++;
- }
- if (!non_zero_count) {
- int ch;
-
- if (first && !zero_count) {
- /* Line was erroneous just '0x'. */
- goto error;
- }
- /*
- * Line was 0x0..0, the sentinel for binutils. Remove
- * the function and filename lines.
- */
- zfree(&line);
- do {
- ch = io__get_char(io);
- } while (ch > 0 && ch != '\n');
- do {
- ch = io__get_char(io);
- } while (ch > 0 && ch != '\n');
- return 0;
- }
- }
- }
- /* Read the second function name line (if inline data then this is the first line). */
- if (first && (io__getline(io, &line, &line_len) < 0 || !line_len))
- goto error;
-
- pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line);
- if (function != NULL)
- *function = strdup(strim(line));
-
- zfree(&line);
- line_len = 0;
-
- /* Read the third filename and line number line. */
- if (io__getline(io, &line, &line_len) < 0 || !line_len)
- goto error;
-
- pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
- if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 &&
- style == GNU_BINUTILS) {
- ret = 0;
- goto error;
- }
-
- if (filename != NULL)
- *filename = strdup(line);
-
- zfree(&line);
- line_len = 0;
-
- return 1;
-
-error:
- free(line);
- if (function != NULL)
- zfree(function);
- if (filename != NULL)
- zfree(filename);
- return ret;
-}
-
-static int inline_list__append_record(struct dso *dso,
- struct inline_node *node,
- struct symbol *sym,
- const char *function,
- const char *filename,
- unsigned int line_nr)
-{
- struct symbol *inline_sym = new_inline_sym(dso, sym, function);
-
- return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node);
-}
-
-static int addr2line(const char *dso_name, u64 addr,
- char **file, unsigned int *line_nr,
- struct dso *dso,
- bool unwind_inlines,
- struct inline_node *node,
- struct symbol *sym __maybe_unused)
-{
- struct child_process *a2l = dso__a2l(dso);
- char *record_function = NULL;
- char *record_filename = NULL;
- unsigned int record_line_nr = 0;
- int record_status = -1;
- int ret = 0;
- size_t inline_count = 0;
- int len;
- char buf[128];
- ssize_t written;
- struct io io = { .eof = false };
- enum a2l_style a2l_style;
-
- if (!a2l) {
- if (!filename__has_section(dso_name, ".debug_line"))
- goto out;
-
- dso__set_a2l(dso,
- addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name));
- a2l = dso__a2l(dso);
- }
-
- if (a2l == NULL) {
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name);
- goto out;
- }
- a2l_style = addr2line_configure(a2l, dso_name);
- if (a2l_style == BROKEN)
- goto out;
-
- /*
- * Send our request and then *deliberately* send something that can't be
- * interpreted as a valid address to ask addr2line about (namely,
- * ","). This causes addr2line to first write out the answer to our
- * request, in an unbounded/unknown number of records, and then to write
- * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or ","
- * for llvm-addr2line, so that we can detect when it has finished giving
- * us anything useful.
- */
- len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr);
- written = len > 0 ? write(a2l->in, buf, len) : -1;
- if (written != len) {
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: could not send request\n", __func__, dso_name);
- goto out;
- }
- io__init(&io, a2l->out, buf, sizeof(buf));
- io.timeout_ms = addr2line_timeout_ms;
- switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true,
- &record_function, &record_filename, &record_line_nr)) {
- case -1:
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: could not read first record\n", __func__, dso_name);
- goto out;
- case 0:
- /*
- * The first record was invalid, so return failure, but first
- * read another record, since we sent a sentinel ',' for the
- * sake of detected the last inlined function. Treat this as the
- * first of a record as the ',' generates a new start with GNU
- * binutils, also force a non-zero address as we're no longer
- * reading that record.
- */
- switch (read_addr2line_record(&io, a2l_style, dso_name,
- /*addr=*/1, /*first=*/true,
- NULL, NULL, NULL)) {
- case -1:
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: could not read sentinel record\n",
- __func__, dso_name);
- break;
- case 0:
- /* The sentinel as expected. */
- break;
- default:
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: unexpected record instead of sentinel",
- __func__, dso_name);
- break;
- }
- goto out;
- default:
- /* First record as expected. */
- break;
- }
-
- if (file) {
- *file = strdup(record_filename);
- ret = 1;
- }
- if (line_nr)
- *line_nr = record_line_nr;
-
- if (unwind_inlines) {
- if (node && inline_list__append_record(dso, node, sym,
- record_function,
- record_filename,
- record_line_nr)) {
- ret = 0;
- goto out;
- }
- }
-
- /*
- * We have to read the records even if we don't care about the inline
- * info. This isn't the first record and force the address to non-zero
- * as we're reading records beyond the first.
- */
- while ((record_status = read_addr2line_record(&io,
- a2l_style,
- dso_name,
- /*addr=*/1,
- /*first=*/false,
- &record_function,
- &record_filename,
- &record_line_nr)) == 1) {
- if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) {
- if (inline_list__append_record(dso, node, sym,
- record_function,
- record_filename,
- record_line_nr)) {
- ret = 0;
- goto out;
- }
- ret = 1; /* found at least one inline frame */
- }
- }
-
-out:
- free(record_function);
- free(record_filename);
- if (io.eof) {
- dso__set_a2l(dso, NULL);
- addr2line_subprocess_cleanup(a2l);
- }
- return ret;
-}
-
-void dso__free_a2l(struct dso *dso)
-{
- struct child_process *a2l = dso__a2l(dso);
-
- if (!a2l)
- return;
-
- addr2line_subprocess_cleanup(a2l);
-
- dso__set_a2l(dso, NULL);
-}
-
-#endif /* HAVE_LIBBFD_SUPPORT */
-
static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
struct dso *dso, struct symbol *sym)
{
@@ -862,7 +145,9 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
INIT_LIST_HEAD(&node->val);
node->addr = addr;
- addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
+ addr2line(dso_name, addr, /*file=*/NULL, /*line_nr=*/NULL, dso,
+ /*unwind_inlines=*/true, node, sym);
+
return node;
}
@@ -889,7 +174,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
goto out_err;
if (!addr2line(dso_name, addr, &file, &line, dso,
- unwind_inlines, NULL, sym))
+ unwind_inlines, /*node=*/NULL, sym))
goto out_err;
srcline = srcline_from_fileline(file, line);
@@ -935,7 +220,8 @@ char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line)
if (dso_name == NULL)
goto out_err;
- if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL))
+ if (!addr2line(dso_name, addr, &file, line, dso, /*unwind_inlines=*/true,
+ /*node=*/NULL, /*sym=*/NULL))
goto out_err;
dso__set_a2l_fails(dso, 0);
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index 75010d39ea28..c36f573cd339 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -9,7 +9,6 @@
struct dso;
struct symbol;
-extern int addr2line_timeout_ms;
extern bool srcline_full_filename;
char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym, bool show_addr, u64 ip);
@@ -29,6 +28,8 @@ void srcline__tree_delete(struct rb_root_cached *tree);
extern char *srcline__unknown;
#define SRCLINE_UNKNOWN srcline__unknown
+#define MAX_INLINE_NEST 1024
+
struct inline_list {
struct symbol *symbol;
char *srcline;
@@ -55,4 +56,10 @@ struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr);
/* delete all nodes within the tree of inline_node s */
void inlines__tree_delete(struct rb_root_cached *tree);
+int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node);
+char *srcline_from_fileline(const char *file, unsigned int line);
+struct symbol *new_inline_sym(struct dso *dso,
+ struct symbol *base_sym,
+ const char *funcname);
+
#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a67b991f4e81..6d02f84c5691 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -439,9 +439,9 @@ static inline void __new_line_std_csv(struct perf_stat_config *config,
aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
}
-static inline void __new_line_std(struct outstate *os)
+static inline void __new_line_std(struct perf_stat_config *config, struct outstate *os)
{
- fprintf(os->fh, " ");
+ fprintf(os->fh, "%*s", COUNTS_LEN + EVNAME_LEN + config->unit_width + 2, "");
}
static void do_new_line_std(struct perf_stat_config *config,
@@ -450,7 +450,7 @@ static void do_new_line_std(struct perf_stat_config *config,
__new_line_std_csv(config, os);
if (config->aggr_mode == AGGR_NONE)
fprintf(os->fh, " ");
- __new_line_std(os);
+ __new_line_std(config, os);
}
static void print_metric_std(struct perf_stat_config *config,
@@ -583,36 +583,13 @@ static void print_metricgroup_header_std(struct perf_stat_config *config,
int n;
if (!metricgroup_name) {
- __new_line_std(os);
+ __new_line_std(config, os);
return;
}
n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name);
- fprintf(config->output, "%*s", MGROUP_LEN - n - 1, "");
-}
-
-/* Filter out some columns that don't work well in metrics only mode */
-
-static bool valid_only_metric(const char *unit)
-{
- if (!unit)
- return false;
- if (strstr(unit, "/sec") ||
- strstr(unit, "CPUs utilized"))
- return false;
- return true;
-}
-
-static const char *fixunit(char *buf, struct evsel *evsel,
- const char *unit)
-{
- if (!strncmp(unit, "of all", 6)) {
- snprintf(buf, 1024, "%s %s", evsel__name(evsel),
- unit);
- return buf;
- }
- return unit;
+ fprintf(config->output, "%*s", MGROUP_LEN + config->unit_width + 2 - n, "");
}
static void print_metric_only(struct perf_stat_config *config,
@@ -621,13 +598,12 @@ static void print_metric_only(struct perf_stat_config *config,
{
struct outstate *os = ctx;
FILE *out = os->fh;
- char buf[1024], str[1024];
+ char str[1024];
unsigned mlen = config->metric_only_len;
const char *color = metric_threshold_classify__color(thresh);
- if (!valid_only_metric(unit))
- return;
- unit = fixunit(buf, os->evsel, unit);
+ if (!unit)
+ unit = "";
if (mlen < strlen(unit))
mlen = strlen(unit) + 1;
@@ -643,16 +619,15 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
void *ctx,
enum metric_threshold_classify thresh __maybe_unused,
const char *fmt,
- const char *unit, double val)
+ const char *unit __maybe_unused, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
char buf[64], *vals, *ends;
- char tbuf[1024];
- if (!valid_only_metric(unit))
+ if (!unit)
return;
- unit = fixunit(tbuf, os->evsel, unit);
+
snprintf(buf, sizeof(buf), fmt ?: "", val);
ends = vals = skip_spaces(buf);
while (isdigit(*ends) || *ends == '.')
@@ -670,13 +645,9 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
{
struct outstate *os = ctx;
char buf[64], *ends;
- char tbuf[1024];
const char *vals;
- if (!valid_only_metric(unit))
- return;
- unit = fixunit(tbuf, os->evsel, unit);
- if (!unit[0])
+ if (!unit || !unit[0])
return;
snprintf(buf, sizeof(buf), fmt ?: "", val);
vals = ends = skip_spaces(buf);
@@ -695,7 +666,6 @@ static void print_metric_header(struct perf_stat_config *config,
const char *unit, double val __maybe_unused)
{
struct outstate *os = ctx;
- char tbuf[1024];
/* In case of iostat, print metric header for first root port only */
if (config->iostat_run &&
@@ -705,9 +675,8 @@ static void print_metric_header(struct perf_stat_config *config,
if (os->evsel->cgrp != os->cgrp)
return;
- if (!valid_only_metric(unit))
+ if (!unit)
return;
- unit = fixunit(tbuf, os->evsel, unit);
if (config->json_output)
return;
@@ -872,7 +841,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
out.ctx = os;
out.force_header = false;
- if (!config->metric_only && !counter->default_metricgroup) {
+ if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) {
abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok);
print_noise(config, os, counter, noise, /*before_metric=*/true);
@@ -880,7 +849,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
if (ok) {
- if (!config->metric_only && counter->default_metricgroup) {
+ if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) {
void *from = NULL;
aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
@@ -902,7 +871,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
&num, from, &out);
} while (from != NULL);
} else {
- perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out);
+ perf_stat__print_shadow_stats(config, counter, aggr_idx, &out);
}
} else {
pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0);
@@ -944,6 +913,9 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
if (verbose == 0 && counter->skippable && !counter->supported)
return true;
+ /* Metric only counts won't be displayed but the metric wants to be computed. */
+ if (config->metric_only)
+ return false;
/*
* Skip value 0 when enabling --per-thread globally,
* otherwise it will have too many 0 output.
@@ -1274,7 +1246,7 @@ static void print_metric_headers(struct perf_stat_config *config,
os.evsel = counter;
- perf_stat__print_shadow_stats(config, counter, 0, 0, &out);
+ perf_stat__print_shadow_stats(config, counter, /*aggr_idx=*/0, &out);
}
if (!config->json_output)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index abaf6b579bfc..9c83f7d96caa 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <math.h>
#include <stdio.h>
#include "evsel.h"
@@ -17,361 +18,32 @@
#include "util/hashmap.h"
#include "tool_pmu.h"
-struct stats walltime_nsecs_stats;
-struct rusage_stats ru_stats;
-
-enum {
- CTX_BIT_USER = 1 << 0,
- CTX_BIT_KERNEL = 1 << 1,
- CTX_BIT_HV = 1 << 2,
- CTX_BIT_HOST = 1 << 3,
- CTX_BIT_IDLE = 1 << 4,
- CTX_BIT_MAX = 1 << 5,
-};
-
-enum stat_type {
- STAT_NONE = 0,
- STAT_NSECS,
- STAT_CYCLES,
- STAT_INSTRUCTIONS,
- STAT_STALLED_CYCLES_FRONT,
- STAT_STALLED_CYCLES_BACK,
- STAT_BRANCHES,
- STAT_BRANCH_MISS,
- STAT_CACHE_REFS,
- STAT_CACHE_MISSES,
- STAT_L1_DCACHE,
- STAT_L1_ICACHE,
- STAT_LL_CACHE,
- STAT_ITLB_CACHE,
- STAT_DTLB_CACHE,
- STAT_L1D_MISS,
- STAT_L1I_MISS,
- STAT_LL_MISS,
- STAT_DTLB_MISS,
- STAT_ITLB_MISS,
- STAT_MAX
-};
-
-static int evsel_context(const struct evsel *evsel)
+static bool tool_pmu__is_time_event(const struct perf_stat_config *config,
+ const struct evsel *evsel, int *tool_aggr_idx)
{
- int ctx = 0;
-
- if (evsel->core.attr.exclude_kernel)
- ctx |= CTX_BIT_KERNEL;
- if (evsel->core.attr.exclude_user)
- ctx |= CTX_BIT_USER;
- if (evsel->core.attr.exclude_hv)
- ctx |= CTX_BIT_HV;
- if (evsel->core.attr.exclude_host)
- ctx |= CTX_BIT_HOST;
- if (evsel->core.attr.exclude_idle)
- ctx |= CTX_BIT_IDLE;
-
- return ctx;
-}
-
-void perf_stat__reset_shadow_stats(void)
-{
- memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
- memset(&ru_stats, 0, sizeof(ru_stats));
-}
-
-static enum stat_type evsel__stat_type(struct evsel *evsel)
-{
- /* Fake perf_hw_cache_op_id values for use with evsel__match. */
- u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
- u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
- u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
- u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
- u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB |
- ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
- ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
-
- if (evsel__is_clock(evsel))
- return STAT_NSECS;
- else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
- return STAT_CYCLES;
- else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
- return STAT_INSTRUCTIONS;
- else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
- return STAT_STALLED_CYCLES_FRONT;
- else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
- return STAT_STALLED_CYCLES_BACK;
- else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- return STAT_BRANCHES;
- else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
- return STAT_BRANCH_MISS;
- else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
- return STAT_CACHE_REFS;
- else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
- return STAT_CACHE_MISSES;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
- return STAT_L1_DCACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
- return STAT_L1_ICACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
- return STAT_LL_CACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
- return STAT_DTLB_CACHE;
- else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
- return STAT_ITLB_CACHE;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
- return STAT_L1D_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
- return STAT_L1I_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
- return STAT_LL_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
- return STAT_DTLB_MISS;
- else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
- return STAT_ITLB_MISS;
- return STAT_NONE;
-}
-
-static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val)
-{
- assert(ratios[0] > ratios[1]);
- assert(ratios[1] > ratios[2]);
-
- return val > ratios[1]
- ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD)
- : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD);
-}
-
-static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
-{
- struct evsel *cur;
- int evsel_ctx = evsel_context(evsel);
- struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
-
- evlist__for_each_entry(evsel->evlist, cur) {
- struct perf_stat_aggr *aggr;
-
- /* Ignore the evsel that is being searched from. */
- if (evsel == cur)
- continue;
-
- /* Ignore evsels that are part of different groups. */
- if (evsel->core.leader->nr_members > 1 &&
- evsel->core.leader != cur->core.leader)
- continue;
- /* Ignore evsels with mismatched modifiers. */
- if (evsel_ctx != evsel_context(cur))
- continue;
- /* Ignore if not the cgroup we're looking for. */
- if (evsel->cgrp != cur->cgrp)
- continue;
- /* Ignore if not the stat we're looking for. */
- if (type != evsel__stat_type(cur))
- continue;
-
- /*
- * Except the SW CLOCK events,
- * ignore if not the PMU we're looking for.
- */
- if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur)))
- continue;
-
- aggr = &cur->stats->aggr[aggr_idx];
- if (type == STAT_NSECS)
- return aggr->counts.val;
- return aggr->counts.val * cur->scale;
- }
- return 0.0;
-}
-
-static void print_ratio(struct perf_stat_config *config,
- const struct evsel *evsel, int aggr_idx,
- double numerator, struct perf_stat_output_ctx *out,
- enum stat_type denominator_type,
- const double thresh_ratios[3], const char *_unit)
-{
- double denominator = find_stat(evsel, aggr_idx, denominator_type);
- double ratio = 0;
- enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN;
- const char *fmt = NULL;
- const char *unit = NULL;
-
- if (numerator && denominator) {
- ratio = numerator / denominator * 100.0;
- thresh = get_ratio_thresh(thresh_ratios, ratio);
- fmt = "%7.2f%%";
- unit = _unit;
- }
- out->print_metric(config, out->ctx, thresh, fmt, unit, ratio);
-}
-
-static void print_stalled_cycles_front(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double stalled,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {50.0, 30.0, 10.0};
-
- print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios,
- "frontend cycles idle");
-}
-
-static void print_stalled_cycles_back(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double stalled,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {75.0, 50.0, 20.0};
-
- print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios,
- "backend cycles idle");
-}
-
-static void print_branch_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios,
- "of all branches");
-}
-
-static void print_l1d_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios,
- "of all L1-dcache accesses");
-}
-
-static void print_l1i_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios,
- "of all L1-icache accesses");
-}
-
-static void print_ll_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios,
- "of all LL-cache accesses");
-}
-
-static void print_dtlb_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios,
- "of all dTLB cache accesses");
-}
+ enum tool_pmu_event event = evsel__tool_event(evsel);
+ int aggr_idx;
-static void print_itlb_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios,
- "of all iTLB cache accesses");
-}
-
-static void print_cache_miss(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out)
-{
- const double thresh_ratios[3] = {20.0, 10.0, 5.0};
-
- print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios,
- "of all cache refs");
-}
-
-static void print_instructions(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double instructions,
- struct perf_stat_output_ctx *out)
-{
- print_metric_t print_metric = out->print_metric;
- void *ctxp = out->ctx;
- double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES);
- double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT),
- find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK));
-
- if (cycles) {
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
- "insn per cycle", instructions / cycles);
- } else {
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
- "insn per cycle", 0);
- }
- if (max_stalled && instructions) {
- if (out->new_line)
- out->new_line(config, ctxp);
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
- "stalled cycles per insn", max_stalled / instructions);
- }
-}
-
-static void print_cycles(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double cycles,
- struct perf_stat_output_ctx *out)
-{
- double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
-
- if (cycles && nsecs) {
- double ratio = cycles / nsecs;
-
- out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f",
- "GHz", ratio);
- } else {
- out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
- "GHz", 0);
- }
-}
-
-static void print_nsecs(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx __maybe_unused, double nsecs,
- struct perf_stat_output_ctx *out)
-{
- print_metric_t print_metric = out->print_metric;
- void *ctxp = out->ctx;
- double wall_time = avg_stats(&walltime_nsecs_stats);
+ if (event != TOOL_PMU__EVENT_DURATION_TIME &&
+ event != TOOL_PMU__EVENT_USER_TIME &&
+ event != TOOL_PMU__EVENT_SYSTEM_TIME)
+ return false;
- if (wall_time) {
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized",
- nsecs / (wall_time * evsel->scale));
- } else {
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL,
- "CPUs utilized", 0);
+ if (config) {
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ if (config->aggr_map->map[aggr_idx].cpu.cpu == 0) {
+ *tool_aggr_idx = aggr_idx;
+ return true;
+ }
+ }
+ pr_debug("Unexpected CPU0 missing in aggregation for tool event.\n");
}
+ *tool_aggr_idx = 0; /* Assume the first aggregation index works. */
+ return true;
}
-static int prepare_metric(const struct metric_expr *mexp,
+static int prepare_metric(struct perf_stat_config *config,
+ const struct metric_expr *mexp,
const struct evsel *evsel,
struct expr_parse_ctx *pctx,
int aggr_idx)
@@ -381,91 +53,51 @@ static int prepare_metric(const struct metric_expr *mexp,
int i;
for (i = 0; metric_events[i]; i++) {
+ int source_count = 0, tool_aggr_idx;
+ bool is_tool_time =
+ tool_pmu__is_time_event(config, metric_events[i], &tool_aggr_idx);
+ struct perf_stat_evsel *ps = metric_events[i]->stats;
+ struct perf_stat_aggr *aggr;
char *n;
double val;
- int source_count = 0;
-
- if (evsel__is_tool(metric_events[i])) {
- struct stats *stats;
- double scale;
- switch (evsel__tool_event(metric_events[i])) {
- case TOOL_PMU__EVENT_DURATION_TIME:
- stats = &walltime_nsecs_stats;
- scale = 1e-9;
- break;
- case TOOL_PMU__EVENT_USER_TIME:
- stats = &ru_stats.ru_utime_usec_stat;
- scale = 1e-6;
- break;
- case TOOL_PMU__EVENT_SYSTEM_TIME:
- stats = &ru_stats.ru_stime_usec_stat;
- scale = 1e-6;
+ /*
+ * If there are multiple uncore PMUs and we're not reading the
+ * leader's stats, determine the stats for the appropriate
+ * uncore PMU.
+ */
+ if (evsel && evsel->metric_leader &&
+ evsel->pmu != evsel->metric_leader->pmu &&
+ mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != mexp->metric_events[i])
+ continue;
+ ps = pos->stats;
+ source_count = 1;
break;
- case TOOL_PMU__EVENT_NONE:
- pr_err("Invalid tool event 'none'");
- abort();
- case TOOL_PMU__EVENT_MAX:
- pr_err("Invalid tool event 'max'");
- abort();
- case TOOL_PMU__EVENT_HAS_PMEM:
- case TOOL_PMU__EVENT_NUM_CORES:
- case TOOL_PMU__EVENT_NUM_CPUS:
- case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
- case TOOL_PMU__EVENT_NUM_DIES:
- case TOOL_PMU__EVENT_NUM_PACKAGES:
- case TOOL_PMU__EVENT_SLOTS:
- case TOOL_PMU__EVENT_SMT_ON:
- case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
- default:
- pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i]));
- abort();
}
- val = avg_stats(stats) * scale;
- source_count = 1;
- } else {
- struct perf_stat_evsel *ps = metric_events[i]->stats;
- struct perf_stat_aggr *aggr;
-
+ }
+ /* Time events are always on CPU0, the first aggregation index. */
+ aggr = &ps->aggr[is_tool_time ? tool_aggr_idx : aggr_idx];
+ if (!aggr || !metric_events[i]->supported) {
/*
- * If there are multiple uncore PMUs and we're not
- * reading the leader's stats, determine the stats for
- * the appropriate uncore PMU.
+ * Not supported events will have a count of 0, which
+ * can be confusing in a metric. Explicitly set the
+ * value to NAN. Not counted events (enable time of 0)
+ * are read as 0.
*/
- if (evsel && evsel->metric_leader &&
- evsel->pmu != evsel->metric_leader->pmu &&
- mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
- struct evsel *pos;
-
- evlist__for_each_entry(evsel->evlist, pos) {
- if (pos->pmu != evsel->pmu)
- continue;
- if (pos->metric_leader != mexp->metric_events[i])
- continue;
- ps = pos->stats;
- source_count = 1;
- break;
- }
- }
- aggr = &ps->aggr[aggr_idx];
- if (!aggr)
- break;
-
- if (!metric_events[i]->supported) {
- /*
- * Not supported events will have a count of 0,
- * which can be confusing in a
- * metric. Explicitly set the value to NAN. Not
- * counted events (enable time of 0) are read as
- * 0.
- */
- val = NAN;
- source_count = 0;
- } else {
- val = aggr->counts.val;
- if (!source_count)
- source_count = evsel__source_count(metric_events[i]);
- }
+ val = NAN;
+ source_count = 0;
+ } else {
+ val = aggr->counts.val;
+ if (is_tool_time)
+ val *= 1e-9; /* Convert time event nanoseconds to seconds. */
+ if (!source_count)
+ source_count = evsel__source_count(metric_events[i]);
}
n = strdup(evsel__metric_id(metric_events[i]));
if (!n)
@@ -511,7 +143,7 @@ static void generic_metric(struct perf_stat_config *config,
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
pctx->sctx.runtime = runtime;
pctx->sctx.system_wide = config->system_wide;
- i = prepare_metric(mexp, evsel, pctx, aggr_idx);
+ i = prepare_metric(config, mexp, evsel, pctx, aggr_idx);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -572,7 +204,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
if (!pctx)
return NAN;
- if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
+ if (prepare_metric(/*config=*/NULL, mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -601,11 +233,9 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
* event. Only align with other metics from
* different metric events.
*/
- if (last_name && !strcmp(last_name, name)) {
- if (!need_full_name || last_pmu != evsel->pmu) {
- out->print_metricgroup_header(config, ctxp, NULL);
- return;
- }
+ if (last_name && !strcmp(last_name, name) && last_pmu == evsel->pmu) {
+ out->print_metricgroup_header(config, ctxp, NULL);
+ return;
}
if (need_full_name && evsel->pmu)
@@ -665,7 +295,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
if (strcmp(name, mexp->default_metricgroup_name))
return (void *)mexp;
/* Only print the name of the metricgroup once */
- if (!header_printed) {
+ if (!header_printed && !evsel->default_show_events) {
header_printed = true;
perf_stat__print_metricgroup_header(config, evsel, ctxp,
name, out);
@@ -682,56 +312,15 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int aggr_idx,
+ int aggr_idx,
struct perf_stat_output_ctx *out)
{
- typedef void (*stat_print_function_t)(struct perf_stat_config *config,
- const struct evsel *evsel,
- int aggr_idx, double misses,
- struct perf_stat_output_ctx *out);
- static const stat_print_function_t stat_print_function[STAT_MAX] = {
- [STAT_INSTRUCTIONS] = print_instructions,
- [STAT_BRANCH_MISS] = print_branch_miss,
- [STAT_L1D_MISS] = print_l1d_miss,
- [STAT_L1I_MISS] = print_l1i_miss,
- [STAT_DTLB_MISS] = print_dtlb_miss,
- [STAT_ITLB_MISS] = print_itlb_miss,
- [STAT_LL_MISS] = print_ll_miss,
- [STAT_CACHE_MISSES] = print_cache_miss,
- [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front,
- [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back,
- [STAT_CYCLES] = print_cycles,
- [STAT_NSECS] = print_nsecs,
- };
print_metric_t print_metric = out->print_metric;
void *ctxp = out->ctx;
- int num = 1;
+ int num = 0;
- if (config->iostat_run) {
+ if (config->iostat_run)
iostat_print_metric(config, evsel, out);
- } else {
- stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)];
-
- if (fn)
- fn(config, evsel, aggr_idx, avg, out);
- else {
- double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
-
- if (nsecs) {
- char unit = ' ';
- char unit_buf[10] = "/sec";
- double ratio = convert_unit_double(1000000000.0 * avg / nsecs,
- &unit);
-
- if (unit != ' ')
- snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
- print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f",
- unit_buf, ratio);
- } else {
- num = 0;
- }
- }
- }
perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx,
&num, NULL, out);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 50b1a92d16df..976a06e63252 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -645,7 +645,8 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e
evsel__process_percore(evsel);
}
-int perf_event__process_stat_event(struct perf_session *session,
+int perf_event__process_stat_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct perf_counts_values count, *ptr;
@@ -716,59 +717,3 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
return ret;
}
-
-int create_perf_stat_counter(struct evsel *evsel,
- struct perf_stat_config *config,
- struct target *target,
- int cpu_map_idx)
-{
- struct perf_event_attr *attr = &evsel->core.attr;
- struct evsel *leader = evsel__leader(evsel);
-
- attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
- PERF_FORMAT_TOTAL_TIME_RUNNING;
-
- /*
- * The event is part of non trivial group, let's enable
- * the group read (for leader) and ID retrieval for all
- * members.
- */
- if (leader->core.nr_members > 1)
- attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
-
- attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
-
- /*
- * Some events get initialized with sample_(period/type) set,
- * like tracepoints. Clear it up for counting.
- */
- attr->sample_period = 0;
-
- if (config->identifier)
- attr->sample_type = PERF_SAMPLE_IDENTIFIER;
-
- if (config->all_user) {
- attr->exclude_kernel = 1;
- attr->exclude_user = 0;
- }
-
- if (config->all_kernel) {
- attr->exclude_kernel = 0;
- attr->exclude_user = 1;
- }
-
- /*
- * Disabling all counters initially, they will be enabled
- * either manually by us or by kernel via enable_on_exec
- * set later.
- */
- if (evsel__is_group_leader(evsel)) {
- attr->disabled = 1;
-
- if (target__enable_on_exec(target))
- attr->enable_on_exec = 1;
- }
-
- return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
- evsel->core.threads);
-}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 4b0f14ae4e5f..f986911c9296 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -56,11 +56,6 @@ enum aggr_mode {
AGGR_MAX
};
-struct rusage_stats {
- struct stats ru_utime_usec_stat;
- struct stats ru_stime_usec_stat;
-};
-
typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
struct perf_stat_config {
@@ -102,7 +97,6 @@ struct perf_stat_config {
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
- struct rusage_stats *ru_stats;
struct cpu_aggr_map *aggr_map;
aggr_get_id_t aggr_get_id;
struct cpu_aggr_map *cpus_aggr_map;
@@ -132,26 +126,9 @@ static inline void init_stats(struct stats *stats)
stats->max = 0;
}
-static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
- init_stats(&ru_stats->ru_utime_usec_stat);
- init_stats(&ru_stats->ru_stime_usec_stat);
-}
-
-static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
- const u64 us_to_ns = 1000;
- const u64 s_to_ns = 1000000000;
- update_stats(&ru_stats->ru_utime_usec_stat,
- (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
- update_stats(&ru_stats->ru_stime_usec_stat,
- (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
-}
-
struct evsel;
struct evlist;
-extern struct stats walltime_nsecs_stats;
-extern struct rusage_stats ru_stats;
-
enum metric_threshold_classify {
METRIC_THRESHOLD_UNKNOWN,
METRIC_THRESHOLD_BAD,
@@ -184,7 +161,7 @@ struct perf_stat_output_ctx {
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int aggr_idx,
+ int aggr_idx,
struct perf_stat_output_ctx *out);
bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run);
void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
@@ -216,17 +193,14 @@ union perf_event;
struct perf_session;
struct target;
-int perf_event__process_stat_event(struct perf_session *session,
+int perf_event__process_stat_event(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event);
size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
-int create_perf_stat_counter(struct evsel *evsel,
- struct perf_stat_config *config,
- struct target *target,
- int cpu_map_idx);
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts, int argc, const char **argv);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 6d2c280a1730..957143fbf8a0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -9,6 +9,7 @@
#include "compress.h"
#include "dso.h"
+#include "libbfd.h"
#include "map.h"
#include "maps.h"
#include "symbol.h"
@@ -24,18 +25,6 @@
#include <symbol/kallsyms.h>
#include <internal/lib.h>
-#ifdef HAVE_LIBBFD_SUPPORT
-#define PACKAGE 'perf'
-#include <bfd.h>
-#endif
-
-#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
-#ifndef DMGL_PARAMS
-#define DMGL_PARAMS (1 << 0) /* Include function args */
-#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
-#endif
-#endif
-
#ifndef EM_AARCH64
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
@@ -871,43 +860,16 @@ out:
return err;
}
-#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
-
static int read_build_id(const char *filename, struct build_id *bid)
{
size_t size = sizeof(bid->data);
- int err = -1;
- bfd *abfd;
-
- abfd = bfd_openr(filename, NULL);
- if (!abfd)
- return -1;
-
- if (!bfd_check_format(abfd, bfd_object)) {
- pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
- goto out_close;
- }
-
- if (!abfd->build_id || abfd->build_id->size > size)
- goto out_close;
-
- memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
- memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
- err = bid->size = abfd->build_id->size;
-
-out_close:
- bfd_close(abfd);
- return err;
-}
-
-#else // HAVE_LIBBFD_BUILDID_SUPPORT
-
-static int read_build_id(const char *filename, struct build_id *bid)
-{
- size_t size = sizeof(bid->data);
- int fd, err = -1;
+ int fd, err;
Elf *elf;
+ err = libbfd__read_build_id(filename, bid);
+ if (err >= 0)
+ goto out;
+
if (size < BUILD_ID_SIZE)
goto out;
@@ -932,8 +894,6 @@ out:
return err;
}
-#endif // HAVE_LIBBFD_BUILDID_SUPPORT
-
int filename__read_build_id(const char *filename, struct build_id *bid)
{
struct kmod_path m = { .name = NULL, };
@@ -942,6 +902,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
if (!filename)
return -EFAULT;
+ if (!is_regular_file(filename))
+ return -EWOULDBLOCK;
err = kmod_path__parse(&m, filename);
if (err)
@@ -1017,44 +979,6 @@ out:
return err;
}
-#ifdef HAVE_LIBBFD_SUPPORT
-
-int filename__read_debuglink(const char *filename, char *debuglink,
- size_t size)
-{
- int err = -1;
- asection *section;
- bfd *abfd;
-
- abfd = bfd_openr(filename, NULL);
- if (!abfd)
- return -1;
-
- if (!bfd_check_format(abfd, bfd_object)) {
- pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
- goto out_close;
- }
-
- section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
- if (!section)
- goto out_close;
-
- if (section->size > size)
- goto out_close;
-
- if (!bfd_get_section_contents(abfd, section, debuglink, 0,
- section->size))
- goto out_close;
-
- err = 0;
-
-out_close:
- bfd_close(abfd);
- return err;
-}
-
-#else
-
int filename__read_debuglink(const char *filename, char *debuglink,
size_t size)
{
@@ -1066,6 +990,10 @@ int filename__read_debuglink(const char *filename, char *debuglink,
Elf_Scn *sec;
Elf_Kind ek;
+ err = libbfd_filename__read_debuglink(filename, debuglink, size);
+ if (err >= 0)
+ goto out;
+
fd = open(filename, O_RDONLY);
if (fd < 0)
goto out;
@@ -1107,8 +1035,6 @@ out:
return err;
}
-#endif
-
bool symsrc__possibly_runtime(struct symsrc *ss)
{
return ss->dynsym || ss->opdsec;
@@ -1521,8 +1447,11 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY);
}
dso__set_symtab_type(curr_dso, dso__symtab_type(dso));
- if (maps__insert(kmaps, curr_map))
+ if (maps__insert(kmaps, curr_map)) {
+ dso__put(curr_dso);
+ map__put(curr_map);
return -1;
+ }
dsos__add(&maps__machine(kmaps)->dsos, curr_dso);
dso__set_loaded(curr_dso);
dso__put(*curr_dsop);
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 7201494c5c20..c6b17c14a2e9 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -4,7 +4,6 @@
#include <errno.h>
#include <unistd.h>
-#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
@@ -43,7 +42,7 @@ static int read_build_id(void *note_data, size_t note_len, struct build_id *bid,
void *ptr;
ptr = note_data;
- while (ptr < (note_data + note_len)) {
+ while ((ptr + sizeof(*nhdr)) < (note_data + note_len)) {
const char *name;
size_t namesz, descsz;
@@ -88,11 +87,8 @@ int filename__read_debuglink(const char *filename __maybe_unused,
*/
int filename__read_build_id(const char *filename, struct build_id *bid)
{
- FILE *fp;
- int ret = -1;
+ int fd, ret = -1;
bool need_swap = false, elf32;
- u8 e_ident[EI_NIDENT];
- int i;
union {
struct {
Elf32_Ehdr ehdr32;
@@ -103,28 +99,32 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
Elf64_Phdr *phdr64;
};
} hdrs;
- void *phdr;
- size_t phdr_size;
- void *buf = NULL;
- size_t buf_size = 0;
+ void *phdr, *buf = NULL;
+ ssize_t phdr_size, ehdr_size, buf_size = 0;
+
+ if (!filename)
+ return -EFAULT;
+ if (!is_regular_file(filename))
+ return -EWOULDBLOCK;
- fp = fopen(filename, "r");
- if (fp == NULL)
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
return -1;
- if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+ if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT)
goto out;
- if (memcmp(e_ident, ELFMAG, SELFMAG) ||
- e_ident[EI_VERSION] != EV_CURRENT)
+ if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) ||
+ hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT)
goto out;
- need_swap = check_need_swap(e_ident[EI_DATA]);
- elf32 = e_ident[EI_CLASS] == ELFCLASS32;
+ need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]);
+ elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32;
+ ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT;
- if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64,
- elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64),
- 1, fp) != 1)
+ if (read(fd,
+ (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT,
+ ehdr_size) != ehdr_size)
goto out;
if (need_swap) {
@@ -138,14 +138,18 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum);
}
}
- phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum
- : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum;
+ if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) ||
+ (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr)))
+ goto out;
+
+ phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum
+ : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum;
phdr = malloc(phdr_size);
if (phdr == NULL)
goto out;
- fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
- if (fread(phdr, phdr_size, 1, fp) != 1)
+ lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
+ if (read(fd, phdr, phdr_size) != phdr_size)
goto out_free;
if (elf32)
@@ -153,8 +157,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
else
hdrs.phdr64 = phdr;
- for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) {
- size_t p_filesz;
+ for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) {
+ ssize_t p_filesz;
if (need_swap) {
if (elf32) {
@@ -180,8 +184,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
goto out_free;
buf = tmp;
}
- fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
- if (fread(buf, p_filesz, 1, fp) != 1)
+ lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
+ if (read(fd, buf, p_filesz) != p_filesz)
goto out_free;
ret = read_build_id(buf, p_filesz, bid, need_swap);
@@ -194,7 +198,7 @@ out_free:
free(buf);
free(phdr);
out:
- fclose(fp);
+ close(fd);
return ret;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e816e4220d33..814f960fa8f8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -107,9 +107,18 @@ static enum dso_binary_type binary_type_symtab[] = {
static bool symbol_type__filter(char __symbol_type)
{
// Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there
+ // 'N' is for debugging symbols, 'n' is a non-data, non-code, non-debug read-only section.
+ // According to 'man nm'.
+ // 'N' first seen in:
+ // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
+ // a seemingly Rust mangled name
+ // Ditto for '1':
+ // root@x1:~# grep ' 1 ' /proc/kallsyms
+ // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
+ // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
char symbol_type = toupper(__symbol_type);
return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' ||
- __symbol_type == 'u' || __symbol_type == 'l';
+ __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1';
}
static int prefix_underscores_count(const char *str)
@@ -946,7 +955,8 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
pos->end -= delta;
}
- if (count == 0) {
+ if (map__start(initial_map) <= (pos->start + delta) &&
+ (pos->start + delta) < map__end(initial_map)) {
map__zput(curr_map);
curr_map = map__get(initial_map);
goto add_symbol;
@@ -955,11 +965,11 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST)
snprintf(dso_name, sizeof(dso_name),
"[guest.kernel].%d",
- kernel_range++);
+ kernel_range);
else
snprintf(dso_name, sizeof(dso_name),
"[kernel].%d",
- kernel_range++);
+ kernel_range);
ndso = dso__new(dso_name);
map__zput(curr_map);
@@ -967,6 +977,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
return -1;
dso__set_kernel(ndso, dso__kernel(dso));
+ dso__set_loaded(ndso);
curr_map = map__new2(pos->start, ndso);
if (curr_map == NULL) {
@@ -980,6 +991,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
dso__put(ndso);
return -1;
}
+ dso__put(ndso);
++kernel_range;
} else if (delta) {
/* Kernel was relocated at boot time */
@@ -1584,137 +1596,6 @@ out_failure:
return -1;
}
-#ifdef HAVE_LIBBFD_SUPPORT
-#define PACKAGE 'perf'
-#include <bfd.h>
-
-static int bfd_symbols__cmpvalue(const void *a, const void *b)
-{
- const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
-
- if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
- return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
-
- return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
-}
-
-static int bfd2elf_binding(asymbol *symbol)
-{
- if (symbol->flags & BSF_WEAK)
- return STB_WEAK;
- if (symbol->flags & BSF_GLOBAL)
- return STB_GLOBAL;
- if (symbol->flags & BSF_LOCAL)
- return STB_LOCAL;
- return -1;
-}
-
-int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
-{
- int err = -1;
- long symbols_size, symbols_count, i;
- asection *section;
- asymbol **symbols, *sym;
- struct symbol *symbol;
- bfd *abfd;
- u64 start, len;
-
- abfd = bfd_openr(debugfile, NULL);
- if (!abfd)
- return -1;
-
- if (!bfd_check_format(abfd, bfd_object)) {
- pr_debug2("%s: cannot read %s bfd file.\n", __func__,
- dso__long_name(dso));
- goto out_close;
- }
-
- if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
- goto out_close;
-
- symbols_size = bfd_get_symtab_upper_bound(abfd);
- if (symbols_size == 0) {
- bfd_close(abfd);
- return 0;
- }
-
- if (symbols_size < 0)
- goto out_close;
-
- symbols = malloc(symbols_size);
- if (!symbols)
- goto out_close;
-
- symbols_count = bfd_canonicalize_symtab(abfd, symbols);
- if (symbols_count < 0)
- goto out_free;
-
- section = bfd_get_section_by_name(abfd, ".text");
- if (section) {
- for (i = 0; i < symbols_count; ++i) {
- if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
- !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
- break;
- }
- if (i < symbols_count) {
- /* PE symbols can only have 4 bytes, so use .text high bits */
- u64 text_offset = (section->vma - (u32)section->vma)
- + (u32)bfd_asymbol_value(symbols[i]);
- dso__set_text_offset(dso, text_offset);
- dso__set_text_end(dso, (section->vma - text_offset) + section->size);
- } else {
- dso__set_text_offset(dso, section->vma - section->filepos);
- dso__set_text_end(dso, section->filepos + section->size);
- }
- }
-
- qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
-
-#ifdef bfd_get_section
-#define bfd_asymbol_section bfd_get_section
-#endif
- for (i = 0; i < symbols_count; ++i) {
- sym = symbols[i];
- section = bfd_asymbol_section(sym);
- if (bfd2elf_binding(sym) < 0)
- continue;
-
- while (i + 1 < symbols_count &&
- bfd_asymbol_section(symbols[i + 1]) == section &&
- bfd2elf_binding(symbols[i + 1]) < 0)
- i++;
-
- if (i + 1 < symbols_count &&
- bfd_asymbol_section(symbols[i + 1]) == section)
- len = symbols[i + 1]->value - sym->value;
- else
- len = section->size - sym->value;
-
- start = bfd_asymbol_value(sym) - dso__text_offset(dso);
- symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
- bfd_asymbol_name(sym));
- if (!symbol)
- goto out_free;
-
- symbols__insert(dso__symbols(dso), symbol);
- }
-#ifdef bfd_get_section
-#undef bfd_asymbol_section
-#endif
-
- symbols__fixup_end(dso__symbols(dso), false);
- symbols__fixup_duplicate(dso__symbols(dso));
- dso__set_adjust_symbols(dso, true);
-
- err = 0;
-out_free:
- free(symbols);
-out_close:
- bfd_close(abfd);
- return err;
-}
-#endif
-
static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
enum dso_binary_type type)
{
@@ -1869,10 +1750,9 @@ int dso__load(struct dso *dso, struct map *map)
/*
* Read the build id if possible. This is required for
- * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+ * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work.
*/
- if (!dso__has_build_id(dso) &&
- is_regular_file(dso__long_name(dso))) {
+ if (!dso__has_build_id(dso)) {
struct build_id bid = { .size = 0, };
__symbol__join_symfs(name, PATH_MAX, dso__long_name(dso));
@@ -2127,6 +2007,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
char sbuild_id[SBUILD_ID_SIZE];
bool is_host = false;
char path[PATH_MAX];
+ struct maps *kmaps = map__kmaps(map);
if (!dso__has_build_id(dso)) {
/*
@@ -2163,8 +2044,13 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
return strdup(path);
/* Use current /proc/kallsyms if possible */
- if (is_host) {
proc_kallsyms:
+ if (kmaps) {
+ struct machine *machine = maps__machine(kmaps);
+
+ scnprintf(path, sizeof(path), "%s/proc/kallsyms", machine->root_dir);
+ return strdup(path);
+ } else if (is_host) {
return strdup("/proc/kallsyms");
}
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index cb2c1ace304a..2ba9fa25e00a 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -389,7 +389,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
dso_id.ino_generation = event->ino_generation;
dso_id.mmap2_valid = true;
dso_id.mmap2_ino_generation_valid = true;
- };
+ }
dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id);
if (dso && dso__has_build_id(dso)) {
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index ee29615d68e5..f8588b6cf11a 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -107,24 +107,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target,
struct perf_thread_map *threads, bool needs_mmap, bool data_mmap,
unsigned int nr_threads_synthesize);
-#ifdef HAVE_AUXTRACE_SUPPORT
int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool,
struct perf_session *session, perf_event__handler_t process);
-#else // HAVE_AUXTRACE_SUPPORT
-
-#include <errno.h>
-
-static inline int
-perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
- const struct perf_tool *tool __maybe_unused,
- struct perf_session *session __maybe_unused,
- perf_event__handler_t process __maybe_unused)
-{
- return -EINVAL;
-}
-#endif // HAVE_AUXTRACE_SUPPORT
-
#ifdef HAVE_LIBBPF_SUPPORT
int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process,
struct machine *machine, struct record_opts *opts);
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
index e83c7ababc2a..27ba5849c74a 100644
--- a/tools/perf/util/tool.c
+++ b/tools/perf/util/tool.c
@@ -13,7 +13,8 @@
#include <unistd.h>
#ifdef HAVE_ZSTD_SUPPORT
-static int perf_session__process_compressed_event(struct perf_session *session,
+static int perf_session__process_compressed_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event, u64 file_offset,
const char *file_path)
{
@@ -79,10 +80,9 @@ static int perf_session__process_compressed_event(struct perf_session *session,
}
#endif
-static int process_event_synth_tracing_data_stub(struct perf_session *session
- __maybe_unused,
- union perf_event *event
- __maybe_unused)
+static int process_event_synth_tracing_data_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
@@ -90,8 +90,7 @@ static int process_event_synth_tracing_data_stub(struct perf_session *session
static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
+ struct evlist **pevlist __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
@@ -99,8 +98,7 @@ static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_un
static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
- struct evlist **pevlist
- __maybe_unused)
+ struct evlist **pevlist __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_event_update(event, stdout);
@@ -151,7 +149,8 @@ static int skipn(int fd, off_t n)
return 0;
}
-static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+static s64 process_event_auxtrace_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
dump_printf(": unhandled!\n");
@@ -160,7 +159,8 @@ static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unus
return event->auxtrace.size;
}
-static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+static int process_event_op2_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
dump_printf(": unhandled!\n");
@@ -169,7 +169,8 @@ static int process_event_op2_stub(struct perf_session *session __maybe_unused,
static
-int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+int process_event_thread_map_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
@@ -180,7 +181,8 @@ int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
}
static
-int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+int process_event_cpu_map_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
@@ -191,7 +193,8 @@ int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
}
static
-int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+int process_event_stat_config_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
@@ -201,7 +204,8 @@ int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
return 0;
}
-static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+static int process_stat_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
@@ -211,7 +215,8 @@ static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
return 0;
}
-static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+static int process_stat_round_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
@@ -221,7 +226,8 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu
return 0;
}
-static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
+static int process_event_time_conv_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
@@ -231,7 +237,8 @@ static int process_event_time_conv_stub(struct perf_session *perf_session __mayb
return 0;
}
-static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
+static int perf_session__process_compressed_event_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
u64 file_offset __maybe_unused,
const char *file_path __maybe_unused)
@@ -240,7 +247,8 @@ static int perf_session__process_compressed_event_stub(struct perf_session *sess
return 0;
}
-static int perf_event__process_bpf_metadata_stub(struct perf_session *perf_session __maybe_unused,
+static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
@@ -258,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->cgroup_events = false;
tool->no_warn = false;
tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
+ tool->merge_deferred_callchains = true;
tool->sample = process_event_sample_stub;
tool->mmap = process_event_stub;
@@ -279,6 +288,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->read = process_event_sample_stub;
tool->throttle = process_event_stub;
tool->unthrottle = process_event_stub;
+ tool->callchain_deferred = process_event_sample_stub;
tool->attr = process_event_synth_attr_stub;
tool->event_update = process_event_synth_event_update_stub;
tool->tracing_data = process_event_synth_tracing_data_stub;
@@ -313,3 +323,177 @@ bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
{
return tool->compressed == perf_session__process_compressed_event_stub;
}
+
+#define CREATE_DELEGATE_SAMPLE(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ union perf_event *event, \
+ struct perf_sample *sample, \
+ struct evsel *evsel, \
+ struct machine *machine) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, event, sample, evsel, machine); \
+ }
+CREATE_DELEGATE_SAMPLE(read);
+CREATE_DELEGATE_SAMPLE(sample);
+CREATE_DELEGATE_SAMPLE(callchain_deferred);
+
+#define CREATE_DELEGATE_ATTR(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ union perf_event *event, \
+ struct evlist **pevlist) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, event, pevlist); \
+ }
+CREATE_DELEGATE_ATTR(attr);
+CREATE_DELEGATE_ATTR(event_update);
+
+#define CREATE_DELEGATE_OE(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ union perf_event *event, \
+ struct ordered_events *oe) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, event, oe); \
+ }
+CREATE_DELEGATE_OE(finished_round);
+
+#define CREATE_DELEGATE_OP(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ union perf_event *event, \
+ struct perf_sample *sample, \
+ struct machine *machine) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, event, sample, machine); \
+ }
+CREATE_DELEGATE_OP(aux);
+CREATE_DELEGATE_OP(aux_output_hw_id);
+CREATE_DELEGATE_OP(bpf);
+CREATE_DELEGATE_OP(cgroup);
+CREATE_DELEGATE_OP(comm);
+CREATE_DELEGATE_OP(context_switch);
+CREATE_DELEGATE_OP(exit);
+CREATE_DELEGATE_OP(fork);
+CREATE_DELEGATE_OP(itrace_start);
+CREATE_DELEGATE_OP(ksymbol);
+CREATE_DELEGATE_OP(lost);
+CREATE_DELEGATE_OP(lost_samples);
+CREATE_DELEGATE_OP(mmap);
+CREATE_DELEGATE_OP(mmap2);
+CREATE_DELEGATE_OP(namespaces);
+CREATE_DELEGATE_OP(text_poke);
+CREATE_DELEGATE_OP(throttle);
+CREATE_DELEGATE_OP(unthrottle);
+
+#define CREATE_DELEGATE_OP2(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ struct perf_session *session, \
+ union perf_event *event) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, session, event); \
+ }
+CREATE_DELEGATE_OP2(auxtrace_error);
+CREATE_DELEGATE_OP2(auxtrace_info);
+CREATE_DELEGATE_OP2(bpf_metadata);
+CREATE_DELEGATE_OP2(build_id);
+CREATE_DELEGATE_OP2(cpu_map);
+CREATE_DELEGATE_OP2(feature);
+CREATE_DELEGATE_OP2(finished_init);
+CREATE_DELEGATE_OP2(id_index);
+CREATE_DELEGATE_OP2(stat);
+CREATE_DELEGATE_OP2(stat_config);
+CREATE_DELEGATE_OP2(stat_round);
+CREATE_DELEGATE_OP2(thread_map);
+CREATE_DELEGATE_OP2(time_conv);
+CREATE_DELEGATE_OP2(tracing_data);
+
+#define CREATE_DELEGATE_OP3(name) \
+ static s64 delegate_ ## name(const struct perf_tool *tool, \
+ struct perf_session *session, \
+ union perf_event *event) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, session, event); \
+ }
+CREATE_DELEGATE_OP3(auxtrace);
+
+#define CREATE_DELEGATE_OP4(name) \
+ static int delegate_ ## name(const struct perf_tool *tool, \
+ struct perf_session *session, \
+ union perf_event *event, \
+ u64 data, \
+ const char *str) \
+ { \
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
+ struct perf_tool *delegate = del_tool->delegate; \
+ return delegate->name(delegate, session, event, data, str); \
+ }
+CREATE_DELEGATE_OP4(compressed);
+
+void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate)
+{
+ tool->delegate = delegate;
+
+ tool->tool.ordered_events = delegate->ordered_events;
+ tool->tool.ordering_requires_timestamps = delegate->ordering_requires_timestamps;
+ tool->tool.namespace_events = delegate->namespace_events;
+ tool->tool.cgroup_events = delegate->cgroup_events;
+ tool->tool.no_warn = delegate->no_warn;
+ tool->tool.show_feat_hdr = delegate->show_feat_hdr;
+ tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains;
+
+ tool->tool.sample = delegate_sample;
+ tool->tool.read = delegate_read;
+
+ tool->tool.mmap = delegate_mmap;
+ tool->tool.mmap2 = delegate_mmap2;
+ tool->tool.comm = delegate_comm;
+ tool->tool.namespaces = delegate_namespaces;
+ tool->tool.cgroup = delegate_cgroup;
+ tool->tool.fork = delegate_fork;
+ tool->tool.exit = delegate_exit;
+ tool->tool.lost = delegate_lost;
+ tool->tool.lost_samples = delegate_lost_samples;
+ tool->tool.aux = delegate_aux;
+ tool->tool.itrace_start = delegate_itrace_start;
+ tool->tool.aux_output_hw_id = delegate_aux_output_hw_id;
+ tool->tool.context_switch = delegate_context_switch;
+ tool->tool.throttle = delegate_throttle;
+ tool->tool.unthrottle = delegate_unthrottle;
+ tool->tool.ksymbol = delegate_ksymbol;
+ tool->tool.bpf = delegate_bpf;
+ tool->tool.text_poke = delegate_text_poke;
+ tool->tool.callchain_deferred = delegate_callchain_deferred;
+
+ tool->tool.attr = delegate_attr;
+ tool->tool.event_update = delegate_event_update;
+
+ tool->tool.tracing_data = delegate_tracing_data;
+
+ tool->tool.finished_round = delegate_finished_round;
+
+ tool->tool.build_id = delegate_build_id;
+ tool->tool.id_index = delegate_id_index;
+ tool->tool.auxtrace_info = delegate_auxtrace_info;
+ tool->tool.auxtrace_error = delegate_auxtrace_error;
+ tool->tool.time_conv = delegate_time_conv;
+ tool->tool.thread_map = delegate_thread_map;
+ tool->tool.cpu_map = delegate_cpu_map;
+ tool->tool.stat_config = delegate_stat_config;
+ tool->tool.stat = delegate_stat;
+ tool->tool.stat_round = delegate_stat_round;
+ tool->tool.feature = delegate_feature;
+ tool->tool.finished_init = delegate_finished_init;
+ tool->tool.bpf_metadata = delegate_bpf_metadata;
+ tool->tool.compressed = delegate_compressed;
+ tool->tool.auxtrace = delegate_auxtrace;
+}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 18b76ff0f26a..e96b69d25a5b 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -26,10 +26,12 @@ typedef int (*event_attr_op)(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
-typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
-typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
-typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
- const char *str);
+typedef int (*event_op2)(const struct perf_tool *tool, struct perf_session *session,
+ union perf_event *event);
+typedef s64 (*event_op3)(const struct perf_tool *tool, struct perf_session *session,
+ union perf_event *event);
+typedef int (*event_op4)(const struct perf_tool *tool, struct perf_session *session,
+ union perf_event *event, u64 data, const char *str);
typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
@@ -42,7 +44,8 @@ enum show_feature_header {
struct perf_tool {
event_sample sample,
- read;
+ read,
+ callchain_deferred;
event_op mmap,
mmap2,
comm,
@@ -87,6 +90,7 @@ struct perf_tool {
bool cgroup_events;
bool no_warn;
bool dont_split_sample_group;
+ bool merge_deferred_callchains;
enum show_feature_header show_feat_hdr;
};
@@ -100,4 +104,13 @@ int process_event_sample_stub(const struct perf_tool *tool,
struct evsel *evsel,
struct machine *machine);
+struct delegate_tool {
+ /** @tool: The actual tool that calls the delegate. */
+ struct perf_tool tool;
+ /** @delegate: The tool that is delegated to. */
+ struct perf_tool *delegate;
+};
+
+void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate);
+
#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index d99e699e646d..37c4eae0bef1 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -2,16 +2,19 @@
#include "cgroup.h"
#include "counts.h"
#include "cputopo.h"
+#include "debug.h"
#include "evsel.h"
#include "pmu.h"
#include "print-events.h"
#include "smt.h"
+#include "stat.h"
#include "time-utils.h"
#include "tool_pmu.h"
#include "tsc.h"
#include <api/fs/fs.h>
#include <api/io.h>
#include <internal/threadmap.h>
+#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <fcntl.h>
#include <strings.h>
@@ -30,6 +33,8 @@ static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
"slots",
"smt_on",
"system_tsc_freq",
+ "core_wide",
+ "target_cpu",
};
bool tool_pmu__skip_event(const char *name __maybe_unused)
@@ -106,6 +111,23 @@ const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
return tool_pmu__event_to_str(evsel->core.attr.config);
}
+struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr)
+{
+ static struct perf_cpu_map *cpu0_map;
+ enum tool_pmu_event event = (enum tool_pmu_event)attr->config;
+
+ if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) {
+ pr_err("Invalid tool PMU event config %llx\n", attr->config);
+ return NULL;
+ }
+ if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME)
+ return cpu_map__online();
+
+ if (!cpu0_map)
+ cpu0_map = perf_cpu_map__new_int(0);
+ return perf_cpu_map__get(cpu0_map);
+}
+
static bool read_until_char(struct io *io, char e)
{
int c;
@@ -239,9 +261,6 @@ int evsel__tool_pmu_open(struct evsel *evsel,
nthreads = perf_thread_map__nr(threads);
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- if (thread >= nthreads)
- break;
-
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
@@ -332,7 +351,11 @@ static bool has_pmem(void)
return has_pmem;
}
-bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result)
+bool tool_pmu__read_event(enum tool_pmu_event ev,
+ struct evsel *evsel,
+ bool system_wide,
+ const char *user_requested_cpu_list,
+ u64 *result)
{
const struct cpu_topology *topology;
@@ -424,6 +447,14 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu
*result = arch_get_tsc_freq();
return true;
+ case TOOL_PMU__EVENT_CORE_WIDE:
+ *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0;
+ return true;
+
+ case TOOL_PMU__EVENT_TARGET_CPU:
+ *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0;
+ return true;
+
case TOOL_PMU__EVENT_NONE:
case TOOL_PMU__EVENT_DURATION_TIME:
case TOOL_PMU__EVENT_USER_TIME:
@@ -434,16 +465,39 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu
}
}
+static void perf_counts__update(struct perf_counts_values *count,
+ const struct perf_counts_values *old_count,
+ bool raw, u64 val)
+{
+ /*
+ * The values of enabled and running must make a ratio of 100%. The
+ * exact values don't matter as long as they are non-zero to avoid
+ * issues with evsel__count_has_error.
+ */
+ if (old_count) {
+ count->val = raw ? val : old_count->val + val;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ count->lost = old_count->lost;
+ } else {
+ count->val = val;
+ count->run++;
+ count->ena++;
+ count->lost = 0;
+ }
+}
+
int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
{
__u64 *start_time, cur_time, delta_start;
- u64 val;
- int fd, err = 0;
+ int err = 0;
struct perf_counts_values *count, *old_count = NULL;
bool adjust = false;
enum tool_pmu_event ev = evsel__tool_event(evsel);
count = perf_counts(evsel->counts, cpu_map_idx, thread);
+ if (evsel->prev_raw_counts)
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
switch (ev) {
case TOOL_PMU__EVENT_HAS_PMEM:
@@ -454,26 +508,23 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
case TOOL_PMU__EVENT_NUM_PACKAGES:
case TOOL_PMU__EVENT_SLOTS:
case TOOL_PMU__EVENT_SMT_ON:
- case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
- if (evsel->prev_raw_counts)
- old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
- val = 0;
+ case TOOL_PMU__EVENT_CORE_WIDE:
+ case TOOL_PMU__EVENT_TARGET_CPU:
+ case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: {
+ u64 val = 0;
+
if (cpu_map_idx == 0 && thread == 0) {
- if (!tool_pmu__read_event(ev, evsel, &val)) {
+ if (!tool_pmu__read_event(ev, evsel,
+ stat_config.system_wide,
+ stat_config.user_requested_cpu_list,
+ &val)) {
count->lost++;
val = 0;
}
}
- if (old_count) {
- count->val = old_count->val + val;
- count->run = old_count->run + 1;
- count->ena = old_count->ena + 1;
- } else {
- count->val = val;
- count->run++;
- count->ena++;
- }
+ perf_counts__update(count, old_count, /*raw=*/false, val);
return 0;
+ }
case TOOL_PMU__EVENT_DURATION_TIME:
/*
* Pretend duration_time is only on the first CPU and thread, or
@@ -489,9 +540,9 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
case TOOL_PMU__EVENT_USER_TIME:
case TOOL_PMU__EVENT_SYSTEM_TIME: {
bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
+ int fd = FD(evsel, cpu_map_idx, thread);
start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
- fd = FD(evsel, cpu_map_idx, thread);
lseek(fd, SEEK_SET, 0);
if (evsel->pid_stat) {
/* The event exists solely on 1 CPU. */
@@ -525,17 +576,9 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
if (adjust) {
__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
- delta_start *= 1000000000 / ticks_per_sec;
+ delta_start *= 1e9 / ticks_per_sec;
}
- count->val = delta_start;
- count->lost = 0;
- /*
- * The values of enabled and running must make a ratio of 100%. The
- * exact values don't matter as long as they are non-zero to avoid
- * issues with evsel__count_has_error.
- */
- count->ena++;
- count->run++;
+ perf_counts__update(count, old_count, /*raw=*/true, delta_start);
return 0;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index d642e7d73910..ea343d1983d3 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -22,6 +22,8 @@ enum tool_pmu_event {
TOOL_PMU__EVENT_SLOTS,
TOOL_PMU__EVENT_SMT_ON,
TOOL_PMU__EVENT_SYSTEM_TSC_FREQ,
+ TOOL_PMU__EVENT_CORE_WIDE,
+ TOOL_PMU__EVENT_TARGET_CPU,
TOOL_PMU__EVENT_MAX,
};
@@ -34,11 +36,17 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str);
bool tool_pmu__skip_event(const char *name);
int tool_pmu__num_skip_events(void);
-bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result);
+bool tool_pmu__read_event(enum tool_pmu_event ev,
+ struct evsel *evsel,
+ bool system_wide,
+ const char *user_requested_cpu_list,
+ u64 *result);
+
u64 tool_pmu__cpu_slots_per_cycle(void);
bool perf_pmu__is_tool(const struct perf_pmu *pmu);
+struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr);
bool evsel__is_tool(const struct evsel *evsel);
enum tool_pmu_event evsel__tool_event(const struct evsel *evsel);
diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c
index e7534a973247..eddb9807131a 100644
--- a/tools/perf/util/tp_pmu.c
+++ b/tools/perf/util/tp_pmu.c
@@ -88,8 +88,6 @@ int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb)
continue;
ret = cb(state, events_ent->d_name);
- if (ret)
- break;
}
close(events_dir.dirfd);
return ret;
diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h
index fa8d480527a2..fbbcfe6f44fe 100644
--- a/tools/perf/util/trace.h
+++ b/tools/perf/util/trace.h
@@ -16,7 +16,7 @@ enum trace_summary_mode {
int trace_prepare_bpf_summary(enum trace_summary_mode mode);
void trace_start_bpf_summary(void);
void trace_end_bpf_summary(void);
-int trace_print_bpf_summary(FILE *fp);
+int trace_print_bpf_summary(FILE *fp, int max_summary);
void trace_cleanup_bpf_summary(void);
#else /* !HAVE_BPF_SKEL */
@@ -27,7 +27,7 @@ static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe
}
static inline void trace_start_bpf_summary(void) {}
static inline void trace_end_bpf_summary(void) {}
-static inline int trace_print_bpf_summary(FILE *fp __maybe_unused)
+static inline int trace_print_bpf_summary(FILE *fp __maybe_unused, int max_summary __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index 78d2297c1b67..1f7c06523059 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -88,7 +88,7 @@ bool gzip_is_compressed(const char *input)
ssize_t rc;
if (fd < 0)
- return -1;
+ return false;
rc = read(fd, buf, sizeof(buf));
close(fd);
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 9741e7503591..de93067a5da3 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -995,7 +995,7 @@ static acpi_status osl_list_customized_tables(char *directory)
{
void *table_dir;
u32 instance;
- char temp_name[ACPI_NAMESEG_SIZE];
+ char temp_name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
char *filename;
acpi_status status = AE_OK;
@@ -1312,7 +1312,7 @@ osl_get_customized_table(char *pathname,
{
void *table_dir;
u32 current_instance = 0;
- char temp_name[ACPI_NAMESEG_SIZE];
+ char temp_name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
char table_filename[PATH_MAX];
char *filename;
acpi_status status;
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index bf30143efbdc..7a6223aa703c 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -86,9 +86,10 @@ u8 ap_is_valid_checksum(struct acpi_table_header *table)
if (ACPI_FAILURE(status)) {
fprintf(stderr, "%4.4s: Warning: wrong checksum in table\n",
table->signature);
+ return (FALSE);
}
- return (AE_OK);
+ return (TRUE);
}
/******************************************************************************
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 75db0091e275..d6b8a201480b 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname)
int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
{
- char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING;
+ char filename[ACPI_NAMESEG_SIZE + 16];
char instance_str[16];
ACPI_FILE file;
acpi_size actual;
diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
index 44a9ecbd91e8..4d9b0177c312 100644
--- a/tools/power/acpi/tools/pfrut/pfrut.c
+++ b/tools/power/acpi/tools/pfrut/pfrut.c
@@ -222,6 +222,7 @@ int main(int argc, char *argv[])
fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR);
if (fd_update_log < 0) {
printf("PFRT device not supported - Quit...\n");
+ close(fd_update);
return 1;
}
@@ -265,7 +266,8 @@ int main(int argc, char *argv[])
printf("chunk2_size:%d\n", data_info.chunk2_size);
printf("rollover_cnt:%d\n", data_info.rollover_cnt);
printf("reset_cnt:%d\n", data_info.reset_cnt);
-
+ close(fd_update);
+ close(fd_update_log);
return 0;
}
@@ -358,6 +360,7 @@ int main(int argc, char *argv[])
if (ret == -1) {
perror("Failed to load capsule file");
+ munmap(addr_map_capsule, st.st_size);
close(fd_capsule);
close(fd_update);
close(fd_update_log);
@@ -420,7 +423,7 @@ int main(int argc, char *argv[])
if (p_mmap == MAP_FAILED) {
perror("mmap error.");
close(fd_update_log);
-
+ free(log_buf);
return 1;
}
diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore
index 5113d5a7aee0..7677329c42a6 100644
--- a/tools/power/cpupower/.gitignore
+++ b/tools/power/cpupower/.gitignore
@@ -27,6 +27,3 @@ debug/i386/intel_gsic
debug/i386/powernow-k8-decode
debug/x86_64/centrino-decode
debug/x86_64/powernow-k8-decode
-
-# Clang's compilation database file
-compile_commands.json
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index c43db1c41205..a1df9196dc45 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -37,9 +37,7 @@ NLS ?= true
# cpufreq-bench benchmarking tool
CPUFREQ_BENCH ?= true
-# Do not build libraries, but build the code in statically
-# Libraries are still built, otherwise the Makefile code would
-# be rather ugly.
+# Build the code, including libraries, statically.
export STATIC ?= false
# Prefix to the directories we're installing to
@@ -207,14 +205,25 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
-$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS)
+ifeq ($(strip $(STATIC)),true)
+LIBCPUPOWER := libcpupower.a
+else
+LIBCPUPOWER := libcpupower.so.$(LIB_VER)
+endif
+
+$(OUTPUT)$(LIBCPUPOWER): $(LIB_OBJS)
+ifeq ($(strip $(STATIC)),true)
+ $(ECHO) " AR " $@
+ $(QUIET) $(AR) rcs $@ $(LIB_OBJS)
+else
$(ECHO) " LD " $@
$(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
-Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS)
@ln -sf $(@F) $(OUTPUT)libcpupower.so
@ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+endif
-libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER)
+libcpupower: $(OUTPUT)$(LIBCPUPOWER)
# Let all .o files depend on its .c file and all headers
# Might be worth to put this into utils/Makefile at some point of time
@@ -224,7 +233,7 @@ $(OUTPUT)%.o: %.c
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
-$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER)
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)$(LIBCPUPOWER)
$(ECHO) " CC " $@
ifeq ($(strip $(STATIC)),true)
$(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
@@ -269,7 +278,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot
done;
endif
-compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER)
+compile-bench: $(OUTPUT)$(LIBCPUPOWER)
@V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
# we compile into subdirectories. if the target directory is not the
@@ -287,6 +296,7 @@ clean:
-find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \
| xargs rm -f
-rm -f $(OUTPUT)cpupower
+ -rm -f $(OUTPUT)libcpupower.a
-rm -f $(OUTPUT)libcpupower.so*
-rm -rf $(OUTPUT)po/*.gmo
-rm -rf $(OUTPUT)po/*.pot
@@ -295,7 +305,11 @@ clean:
install-lib: libcpupower
$(INSTALL) -d $(DESTDIR)${libdir}
+ifeq ($(strip $(STATIC)),true)
+ $(CP) $(OUTPUT)libcpupower.a $(DESTDIR)${libdir}/
+else
$(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
+endif
$(INSTALL) -d $(DESTDIR)${includedir}
$(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
$(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
@@ -336,11 +350,7 @@ install-bench: compile-bench
@#DESTDIR must be set from outside to survive
@sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install
-ifeq ($(strip $(STATIC)),true)
-install: all install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
-else
install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
-endif
uninstall:
- rm -f $(DESTDIR)${libdir}/libcpupower.*
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 0ecac009273c..f2c1139adf71 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -233,6 +233,7 @@ int cpuidle_state_disable(unsigned int cpu,
{
char value[SYSFS_PATH_MAX];
int bytes_written;
+ int len;
if (cpuidle_state_count(cpu) <= idlestate)
return -1;
@@ -241,10 +242,10 @@ int cpuidle_state_disable(unsigned int cpu,
idlestate_value_files[IDLESTATE_DISABLE]))
return -2;
- snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+ len = snprintf(value, SYSFS_PATH_MAX, "%u", disable);
bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
- value, sizeof(disable));
+ value, len);
if (bytes_written)
return 0;
return -3;
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index ce8dfb8e46ab..d7f7ec6f151c 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -56,7 +56,7 @@ unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen)
if (numwritten < 1) {
perror(path);
close(fd);
- return -1;
+ return 0;
}
close(fd);
diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1
index 500653ef98c7..8ac82b6f9189 100644
--- a/tools/power/cpupower/man/cpupower-set.1
+++ b/tools/power/cpupower/man/cpupower-set.1
@@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information.
.RE
.PP
-\-\-turbo\-boost, \-t
+\-\-turbo\-boost, \-\-boost, \-t
.RS 4
-This option is used to enable or disable the turbo boost feature on
-supported Intel and AMD processors.
+This option is used to enable or disable the boost feature on
+supported Intel and AMD processors, and other boost supported systems.
+(The --boost option is an alias for the --turbo-boost option)
This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature.
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index fc750e127404..7d3732f5f2f6 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu)
/* ToDo: Make this more global */
unsigned long pstates[MAX_HW_PSTATES] = {0,};
- ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states);
+ ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states);
if (ret) {
printf(_("Error while evaluating Boost Capabilities"
" on CPU %d -- are you root?\n"), cpu);
@@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu)
return 0;
}
+static int get_boost_mode_generic(unsigned int cpu)
+{
+ bool active;
+
+ if (!cpufreq_has_generic_boost_support(&active)) {
+ printf(_(" boost state support:\n"));
+ printf(_(" Active: %s\n"), active ? _("yes") : _("no"));
+ }
+
+ return 0;
+}
+
/* --boost / -b */
static int get_boost_mode(unsigned int cpu)
@@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu)
cpupower_cpu_info.vendor == X86_VENDOR_HYGON ||
cpupower_cpu_info.vendor == X86_VENDOR_INTEL)
return get_boost_mode_x86(cpu);
+ else
+ get_boost_mode_generic(cpu);
freqs = cpufreq_get_boost_frequencies(cpu);
if (freqs) {
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 0677b58374ab..c2117e5650dd 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -21,6 +21,7 @@ static struct option set_opts[] = {
{"epp", required_argument, NULL, 'e'},
{"amd-pstate-mode", required_argument, NULL, 'm'},
{"turbo-boost", required_argument, NULL, 't'},
+ {"boost", required_argument, NULL, 't'},
{ },
};
@@ -62,8 +63,8 @@ int cmd_set(int argc, char **argv)
params.params = 0;
/* parameter parsing */
- while ((ret = getopt_long(argc, argv, "b:e:m:",
- set_opts, NULL)) != -1) {
+ while ((ret = getopt_long(argc, argv, "b:e:m:t:",
+ set_opts, NULL)) != -1) {
switch (ret) {
case 'b':
if (params.perf_bias)
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 95749b8ee475..82ea62bdf5a2 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info;
/* cpuid and cpuinfo helpers **************************/
+int cpufreq_has_generic_boost_support(bool *active);
+int cpupower_set_turbo_boost(int turbo_boost);
+
/* X86 ONLY ****************************************/
#if defined(__i386__) || defined(__x86_64__)
@@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
extern int cpupower_set_epp(unsigned int cpu, char *epp);
extern int cpupower_set_amd_pstate_mode(char *mode);
-extern int cpupower_set_turbo_boost(int turbo_boost);
/* Read/Write msr ****************************/
@@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states,
/* AMD HW pstate decoding **************************/
-extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
- int *active, int * states);
+int cpufreq_has_x86_boost_support(unsigned int cpu, int *support,
+ int *active, int *states);
/* AMD P-State stuff **************************/
bool cpupower_amd_pstate_enabled(void);
@@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp)
{ return -1; };
static inline int cpupower_set_amd_pstate_mode(char *mode)
{ return -1; };
-static inline int cpupower_set_turbo_boost(int turbo_boost)
-{ return -1; };
/* Read/Write msr ****************************/
-static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
- int *active, int * states)
+static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support,
+ int *active, int *states)
{ return -1; }
static inline bool cpupower_amd_pstate_enabled(void)
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 76e461ff4f74..166dc1e470ea 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -8,15 +8,14 @@
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
#include "cpufreq.h"
+#include "cpupower_intern.h"
#if defined(__i386__) || defined(__x86_64__)
-#include "cpupower_intern.h"
-
#define MSR_AMD_HWCR 0xc0010015
-int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
- int *states)
+int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active,
+ int *states)
{
int ret;
unsigned long long val;
@@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode)
return 0;
}
-int cpupower_set_turbo_boost(int turbo_boost)
-{
- char path[SYSFS_PATH_MAX];
- char linebuf[2] = {};
-
- snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
-
- if (!is_valid_path(path))
- return -1;
-
- snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost);
-
- if (cpupower_write_sysfs(path, linebuf, 2) <= 0)
- return -1;
-
- return 0;
-}
-
bool cpupower_amd_pstate_enabled(void)
{
char *driver = cpufreq_get_driver(0);
@@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void)
#endif /* #if defined(__i386__) || defined(__x86_64__) */
+int cpufreq_has_generic_boost_support(bool *active)
+{
+ char path[SYSFS_PATH_MAX];
+ char linebuf[2] = {};
+ unsigned long val;
+ char *endp;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
+
+ if (!is_valid_path(path))
+ return -EACCES;
+
+ if (cpupower_read_sysfs(path, linebuf, 2) <= 0)
+ return -EINVAL;
+
+ val = strtoul(linebuf, &endp, 0);
+ if (endp == linebuf || errno == ERANGE)
+ return -EINVAL;
+
+ switch (val) {
+ case 0:
+ *active = false;
+ break;
+ case 1:
+ *active = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* get_cpustate
*
* Gather the information of all online CPUs into bitmask struct
@@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding)
}
}
}
+
+int cpupower_set_turbo_boost(int turbo_boost)
+{
+ char path[SYSFS_PATH_MAX];
+ char linebuf[2] = {};
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
+
+ if (!is_valid_path(path))
+ return -1;
+
+ snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost);
+
+ if (cpupower_write_sysfs(path, linebuf, 2) <= 0)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
index feb9f9421c7b..875b086550d1 100755
--- a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
+++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
@@ -11,7 +11,7 @@ Prerequisites:
gnuplot 5.0 or higher
gnuplot-py 1.8 or higher
(Most of the distributions have these required packages. They may be called
- gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+ gnuplot-py, python-gnuplot or python3-gnuplot, gnuplot-nox, ... )
Kernel config for Linux trace is enabled
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 0ce251b8d466..558138eea75e 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.23";
+static const char *version_str = "v1.24";
static const int supported_api_ver = 3;
static struct isst_if_platform_info isst_platform_info;
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index 4f389e1c0525..ebaad0dc8ca6 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -452,13 +452,16 @@ static int tpmi_get_pbf_info(struct isst_id *id, int level,
return _pbf_get_coremask_info(id, level, pbf_info);
}
+#define FEATURE_ENABLE_WAIT_US 1000
+#define FEATURE_ENABLE_RETRIES 5
+
static int tpmi_set_pbf_fact_status(struct isst_id *id, int pbf, int enable)
{
struct isst_pkg_ctdp pkg_dev;
struct isst_pkg_ctdp_level_info ctdp_level;
int current_level;
struct isst_perf_feature_control info;
- int ret;
+ int ret, i;
ret = isst_get_ctdp_levels(id, &pkg_dev);
if (ret)
@@ -503,6 +506,30 @@ static int tpmi_set_pbf_fact_status(struct isst_id *id, int pbf, int enable)
if (ret == -1)
return ret;
+ for (i = 0; i < FEATURE_ENABLE_RETRIES; ++i) {
+
+ usleep(FEATURE_ENABLE_WAIT_US);
+
+ /* Check status */
+ ret = isst_get_ctdp_control(id, current_level, &ctdp_level);
+ if (ret)
+ return ret;
+
+ debug_printf("pbf_enabled:%d fact_enabled:%d\n",
+ ctdp_level.pbf_enabled, ctdp_level.fact_enabled);
+
+ if (pbf) {
+ if (ctdp_level.pbf_enabled == enable)
+ break;
+ } else {
+ if (ctdp_level.fact_enabled == enable)
+ break;
+ }
+ }
+
+ if (i == FEATURE_ENABLE_RETRIES)
+ return -1;
+
return 0;
}
@@ -513,6 +540,7 @@ static int tpmi_get_fact_info(struct isst_id *id, int level, int fact_bucket,
int i, j;
int ret;
+ memset(&info, 0, sizeof(info));
info.socket_id = id->pkg;
info.power_domain_id = id->punit;
info.level = level;
@@ -659,7 +687,8 @@ static int tpmi_pm_qos_config(struct isst_id *id, int enable_clos,
int priority_type)
{
struct isst_core_power info;
- int i, ret, saved_punit;
+ int cp_state = 0, cp_cap = 0;
+ int i, j, ret, saved_punit;
info.get_set = 1;
info.socket_id = id->pkg;
@@ -679,6 +708,19 @@ static int tpmi_pm_qos_config(struct isst_id *id, int enable_clos,
id->punit = saved_punit;
return ret;
}
+ /* Get status */
+ for (j = 0; j < FEATURE_ENABLE_RETRIES; ++j) {
+ usleep(FEATURE_ENABLE_WAIT_US);
+ ret = tpmi_read_pm_config(id, &cp_state, &cp_cap);
+ debug_printf("ret:%d cp_state:%d enable_clos:%d\n", ret,
+ cp_state, enable_clos);
+ if (ret || cp_state == enable_clos)
+ break;
+ }
+ if (j == FEATURE_ENABLE_RETRIES) {
+ id->punit = saved_punit;
+ return -1;
+ }
}
}
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 3340def58d01..1551fcdbfd8a 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -101,7 +101,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a comma-separated-list of CATEGORIES of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "cache", "llc", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -159,6 +159,10 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
+\fBLLCkRPS\fP Last Level Cache Thousands of References Per Second. For CPUs with an L3 LLC, this is the number of references that CPU made to the L3 (and the number of misses that CPU made to it's L2). For CPUs with an L2 LLC, this is the number of references to the L2 (and the number of misses to the CPU's L1). The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1000 in the interest of usually fitting into 8 columns.
+.PP
+\fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * (References - Misses)/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_References - Sum_Misses)/Sum_References).
+.PP
\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
@@ -410,25 +414,24 @@ CPU pCPU%c1 CPU%c1
.fi
.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device)
-Here we run on hybrid, Raptor Lake platform.
-We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore).
+Here we run on hybrid, Meteor Lake platform.
+We limit turbostat to show output for just cpu0 (pcore) and cpu4 (ecore).
We add a counter showing number of L3 cache misses, using virtual "cpu" device,
labeling it with the column header, "VCMISS".
We add a counter showing number of L3 cache misses, using virtual "cpu_core" device,
-labeling it with the column header, "PCMISS". This will fail on ecore cpu12.
+labeling it with the column header, "PCMISS". This will fail on ecore cpu4.
We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device,
labeling it with the column header, "ECMISS". This will fail on pcore cpu0.
We display it only once, after the conclusion of 0.1 second sleep.
.nf
-sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1
+sudo ./turbostat --quiet --cpu 0,4 --show CPU --add perf/cpu/cache-misses,cpu,delta,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,ECMISS sleep 5
turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0
-turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12
-0.104630 sec
-CPU ECMISS PCMISS VCMISS
-- 0x0000000000000000 0x0000000000000000 0x0000000000000000
-0 0x0000000000000000 0x0000000000007951 0x0000000000007796
-12 0x000000000001137a 0x0000000000000000 0x0000000000011392
-
+turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu4
+5.001207 sec
+CPU ECMISS PCMISS VCMISS
+- 41586506 46291219 87877749
+4 83173012 0 83173040
+0 0 92582439 92582458
.fi
.SH ADD PMT COUNTER EXAMPLE
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 72a280e7a9d5..5ad45c2ac5bd 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -142,6 +142,7 @@ struct msr_counter {
#define FLAGS_SHOW (1 << 1)
#define SYSFS_PERCPU (1 << 1)
};
+static int use_android_msr_path;
struct msr_counter bic[] = {
{ 0x0, "usec", NULL, 0, 0, 0, NULL, 0 },
@@ -209,6 +210,8 @@ struct msr_counter bic[] = {
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "LLCkRPS", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 },
};
/* n.b. bic_names must match the order in bic[], above */
@@ -278,6 +281,8 @@ enum bic_names {
BIC_NMI,
BIC_CPU_c1e,
BIC_pct_idle,
+ BIC_LLC_RPS,
+ BIC_LLC_HIT,
MAX_BIC
};
@@ -305,6 +310,7 @@ static cpu_set_t bic_group_frequency;
static cpu_set_t bic_group_hw_idle;
static cpu_set_t bic_group_sw_idle;
static cpu_set_t bic_group_idle;
+static cpu_set_t bic_group_cache;
static cpu_set_t bic_group_other;
static cpu_set_t bic_group_disabled_by_default;
static cpu_set_t bic_enabled;
@@ -413,9 +419,14 @@ static void bic_groups_init(void)
SET_BIC(BIC_pct_idle, &bic_group_sw_idle);
BIC_INIT(&bic_group_idle);
+
CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle);
SET_BIC(BIC_pct_idle, &bic_group_idle);
+ BIC_INIT(&bic_group_cache);
+ SET_BIC(BIC_LLC_RPS, &bic_group_cache);
+ SET_BIC(BIC_LLC_HIT, &bic_group_cache);
+
BIC_INIT(&bic_group_other);
SET_BIC(BIC_IRQ, &bic_group_other);
SET_BIC(BIC_NMI, &bic_group_other);
@@ -466,12 +477,11 @@ static void bic_groups_init(void)
#define PCL_10 14 /* PC10 */
#define PCLUNL 15 /* Unlimited */
-struct amperf_group_fd;
-
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+int *fd_llc_percpu;
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -482,11 +492,12 @@ unsigned int quiet;
unsigned int shown;
unsigned int sums_need_wide_columns;
unsigned int rapl_joules;
+unsigned int valid_rapl_msrs;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
unsigned int force_load;
-unsigned int has_aperf;
+unsigned int cpuid_has_aperf_mperf;
unsigned int has_aperf_access;
unsigned int has_epb;
unsigned int has_turbo;
@@ -552,8 +563,7 @@ static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
int add_counter(unsigned int msr_num, char *path, char *name,
- unsigned int width, enum counter_scope scope,
- enum counter_type type, enum counter_format format, int flags, int package_num);
+ unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num);
/* Model specific support Start */
@@ -578,7 +588,7 @@ struct platform_features {
bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
- int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+ int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
@@ -733,7 +743,7 @@ static const struct platform_features snb_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features snx_features = {
@@ -745,7 +755,7 @@ static const struct platform_features snx_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features ivb_features = {
@@ -758,7 +768,7 @@ static const struct platform_features ivb_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features ivx_features = {
@@ -770,7 +780,7 @@ static const struct platform_features ivx_features = {
.cst_limit = CST_LIMIT_SNB,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
};
static const struct platform_features hsw_features = {
@@ -784,7 +794,7 @@ static const struct platform_features hsw_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hsx_features = {
@@ -798,7 +808,7 @@ static const struct platform_features hsx_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
.plr_msrs = PLR_CORE | PLR_RING,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -813,7 +823,7 @@ static const struct platform_features hswl_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features hswg_features = {
@@ -827,7 +837,7 @@ static const struct platform_features hswg_features = {
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
.plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdw_features = {
@@ -840,7 +850,7 @@ static const struct platform_features bdw_features = {
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdwg_features = {
@@ -853,7 +863,7 @@ static const struct platform_features bdwg_features = {
.cst_limit = CST_LIMIT_HSW,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
};
static const struct platform_features bdx_features = {
@@ -867,7 +877,7 @@ static const struct platform_features bdx_features = {
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -884,7 +894,7 @@ static const struct platform_features skl_features = {
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
.enable_tsc_tweak = 1,
};
@@ -901,7 +911,7 @@ static const struct platform_features cnl_features = {
.has_ext_cst_msrs = 1,
.trl_msrs = TRL_BASE,
.tcc_offset_bits = 6,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
.enable_tsc_tweak = 1,
};
@@ -919,7 +929,7 @@ static const struct platform_features adl_features = {
.has_ext_cst_msrs = cnl_features.has_ext_cst_msrs,
.trl_msrs = cnl_features.trl_msrs,
.tcc_offset_bits = cnl_features.tcc_offset_bits,
- .rapl_msrs = cnl_features.rapl_msrs,
+ .plat_rapl_msrs = cnl_features.plat_rapl_msrs,
.enable_tsc_tweak = cnl_features.enable_tsc_tweak,
};
@@ -937,7 +947,7 @@ static const struct platform_features lnl_features = {
.has_ext_cst_msrs = adl_features.has_ext_cst_msrs,
.trl_msrs = adl_features.trl_msrs,
.tcc_offset_bits = adl_features.tcc_offset_bits,
- .rapl_msrs = adl_features.rapl_msrs,
+ .plat_rapl_msrs = adl_features.plat_rapl_msrs,
.enable_tsc_tweak = adl_features.enable_tsc_tweak,
};
@@ -952,7 +962,7 @@ static const struct platform_features skx_features = {
.has_irtl_msrs = 1,
.has_cst_auto_convension = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
};
@@ -968,7 +978,7 @@ static const struct platform_features icx_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
.has_fixed_rapl_unit = 1,
};
@@ -985,7 +995,7 @@ static const struct platform_features spr_features = {
.has_cst_prewake_bit = 1,
.has_fixed_rapl_psys_unit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features dmr_features = {
@@ -1000,7 +1010,7 @@ static const struct platform_features dmr_features = {
.has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
.trl_msrs = spr_features.trl_msrs,
.has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */
- .rapl_msrs = 0, /* DMR does not have RAPL MSRs */
+ .plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */
.plr_msrs = 0, /* DMR does not have PLR MSRs */
.has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */
.has_config_tdp = 0, /* DMR does not have CTDP MSRs */
@@ -1019,7 +1029,7 @@ static const struct platform_features srf_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features grr_features = {
@@ -1035,7 +1045,7 @@ static const struct platform_features grr_features = {
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
static const struct platform_features slv_features = {
@@ -1048,7 +1058,7 @@ static const struct platform_features slv_features = {
.has_msr_c6_demotion_policy_config = 1,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_ATOM,
- .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
.has_rapl_divisor = 1,
.rapl_quirk_tdp = 30,
};
@@ -1061,7 +1071,7 @@ static const struct platform_features slvd_features = {
.cst_limit = CST_LIMIT_SLV,
.has_msr_atom_pkg_c6_residency = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_CORE,
.rapl_quirk_tdp = 30,
};
@@ -1082,7 +1092,7 @@ static const struct platform_features gmt_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features gmtd_features = {
@@ -1095,7 +1105,7 @@ static const struct platform_features gmtd_features = {
.has_irtl_msrs = 1,
.has_msr_core_c1_res = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
};
static const struct platform_features gmtp_features = {
@@ -1107,7 +1117,7 @@ static const struct platform_features gmtp_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+ .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
};
static const struct platform_features tmt_features = {
@@ -1118,7 +1128,7 @@ static const struct platform_features tmt_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
.enable_tsc_tweak = 1,
};
@@ -1130,7 +1140,7 @@ static const struct platform_features tmtd_features = {
.cst_limit = CST_LIMIT_GMT,
.has_irtl_msrs = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
- .rapl_msrs = RAPL_PKG_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL,
};
static const struct platform_features knl_features = {
@@ -1142,7 +1152,7 @@ static const struct platform_features knl_features = {
.cst_limit = CST_LIMIT_KNL,
.has_msr_knl_core_c6_residency = 1,
.trl_msrs = TRL_KNL,
- .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
.has_fixed_rapl_unit = 1,
.need_perf_multiplier = 1,
};
@@ -1151,7 +1161,7 @@ static const struct platform_features default_features = {
};
static const struct platform_features amd_features_with_rapl = {
- .rapl_msrs = RAPL_AMD_F17H,
+ .plat_rapl_msrs = RAPL_AMD_F17H,
.has_per_core_rapl = 1,
.rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
};
@@ -1195,7 +1205,7 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_EMERALDRAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_D, &spr_features },
- { INTEL_PANTHERCOVE_X, &dmr_features },
+ { INTEL_DIAMONDRAPIDS_X, &dmr_features },
{ INTEL_LAKEFIELD, &cnl_features },
{ INTEL_ALDERLAKE, &adl_features },
{ INTEL_ALDERLAKE_L, &adl_features },
@@ -1210,6 +1220,9 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_ARROWLAKE, &adl_features },
{ INTEL_LUNARLAKE_M, &lnl_features },
{ INTEL_PANTHERLAKE_L, &lnl_features },
+ { INTEL_NOVALAKE, &lnl_features },
+ { INTEL_NOVALAKE_L, &lnl_features },
+ { INTEL_WILDCATLAKE_L, &lnl_features },
{ INTEL_ATOM_SILVERMONT, &slv_features },
{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
{ INTEL_ATOM_AIRMONT, &amt_features },
@@ -1294,8 +1307,7 @@ char *progname;
#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize,
- cpu_subset_size;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
#define MAX_ADDED_CORE_COUNTERS 8
#define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -1890,7 +1902,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
sscanf((*a)->d_name, "telem%u", &aidx);
sscanf((*b)->d_name, "telem%u", &bidx);
- return aidx >= bidx;
+ return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0;
}
const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
@@ -1991,6 +2003,10 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size)
pmt_counter_resize_(pcounter, new_size);
}
+struct llc_stats {
+ unsigned long long references;
+ unsigned long long misses;
+};
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -2003,6 +2019,7 @@ struct thread_data {
unsigned long long irq_count;
unsigned long long nmi_count;
unsigned int smi_count;
+ struct llc_stats llc;
unsigned int cpu_id;
unsigned int apic_id;
unsigned int x2apic_id;
@@ -2118,7 +2135,7 @@ off_t idx_to_offset(int idx)
switch (idx) {
case IDX_PKG_ENERGY:
- if (platform->rapl_msrs & RAPL_AMD_F17H)
+ if (valid_rapl_msrs & RAPL_AMD_F17H)
offset = MSR_PKG_ENERGY_STAT;
else
offset = MSR_PKG_ENERGY_STATUS;
@@ -2184,19 +2201,19 @@ int idx_valid(int idx)
{
switch (idx) {
case IDX_PKG_ENERGY:
- return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
+ return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
- return platform->rapl_msrs & RAPL_DRAM;
+ return valid_rapl_msrs & RAPL_DRAM;
case IDX_PP0_ENERGY:
- return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
+ return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS;
case IDX_PP1_ENERGY:
- return platform->rapl_msrs & RAPL_GFX;
+ return valid_rapl_msrs & RAPL_GFX;
case IDX_PKG_PERF:
- return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
+ return valid_rapl_msrs & RAPL_PKG_PERF_STATUS;
case IDX_DRAM_PERF:
- return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
+ return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS;
case IDX_PSYS_ENERGY:
- return platform->rapl_msrs & RAPL_PSYS;
+ return valid_rapl_msrs & RAPL_PSYS;
default:
return 0;
}
@@ -2362,23 +2379,19 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
return retval;
}
-int is_cpu_first_thread_in_core(PER_THREAD_PARAMS)
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c)
{
- UNUSED(p);
-
return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
}
-int is_cpu_first_core_in_package(PER_THREAD_PARAMS)
+int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p)
{
- UNUSED(c);
-
return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
}
-int is_cpu_first_thread_in_package(PER_THREAD_PARAMS)
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
- return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+ return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p);
}
int cpu_migrate(int cpu)
@@ -2400,20 +2413,11 @@ int get_msr_fd(int cpu)
if (fd)
return fd;
-#if defined(ANDROID)
- sprintf(pathname, "/dev/msr%d", cpu);
-#else
- sprintf(pathname, "/dev/cpu/%d/msr", cpu);
-#endif
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
-#if defined(ANDROID)
- err(-1, "%s open failed, try chown or chmod +r /dev/msr*, "
- "or run with --no-msr, or run as root", pathname);
-#else
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
- "or run with --no-msr, or run as root", pathname);
-#endif
+ err(-1, "%s open failed, try chown or chmod +r %s, "
+ "or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr");
fd_percpu[cpu] = fd;
return fd;
@@ -2432,6 +2436,13 @@ static void bic_disable_msr_access(void)
free_sys_msr_counters();
}
+static void bic_disable_perf_access(void)
+{
+ CLR_BIC(BIC_IPC, &bic_enabled);
+ CLR_BIC(BIC_LLC_RPS, &bic_enabled);
+ CLR_BIC(BIC_LLC_HIT, &bic_enabled);
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
assert(!no_perf);
@@ -2512,7 +2523,7 @@ int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
{
int ret;
- if (!(platform->rapl_msrs & cai->feature_mask))
+ if (!(valid_rapl_msrs & cai->feature_mask))
return -1;
ret = add_msr_counter(cpu, cai->msr);
@@ -2656,6 +2667,12 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
} else if (!strcmp(name_list, "idle")) {
CPU_OR(ret_set, ret_set, &bic_group_idle);
break;
+ } else if (!strcmp(name_list, "cache")) {
+ CPU_OR(ret_set, ret_set, &bic_group_cache);
+ break;
+ } else if (!strcmp(name_list, "llc")) {
+ CPU_OR(ret_set, ret_set, &bic_group_cache);
+ break;
} else if (!strcmp(name_list, "swidle")) {
CPU_OR(ret_set, ret_set, &bic_group_sw_idle);
break;
@@ -2677,8 +2694,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
- MAX_DEFERRED, name_list);
+ fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list);
help();
exit(1);
}
@@ -2687,8 +2703,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
if (debug)
fprintf(stderr, "deferred \"%s\"\n", name_list);
if (deferred_skip_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
- MAX_DEFERRED, name_list);
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list);
help();
exit(1);
}
@@ -2702,6 +2717,47 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode)
}
}
+/*
+ * print_name()
+ * Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab)
+ * Otherwise, allow the name + tab to fit within 8-coumn tab-stop.
+ * In both cases, left justififed, just like other turbostat columns,
+ * to allow the column values to consume the tab.
+ *
+ * Yes, 32-bit counters can overflow 8-columns, and
+ * 64-bit counters can overflow 16-columns, but that is uncommon.
+ */
+static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format)
+{
+ UNUSED(type);
+
+ if (format == FORMAT_RAW && width >= 64)
+ return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name));
+ else
+ return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name));
+}
+
+static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value)
+{
+ if (width <= 32)
+ return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value));
+ else
+ return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value));
+}
+
+static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value)
+{
+ if (width <= 32)
+ return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value));
+ else
+ return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value));
+}
+
+static inline int print_float_value(int *printed, char *delim, double value)
+{
+ return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value));
+}
+
void print_header(char *delim)
{
struct msr_counter *mp;
@@ -2757,50 +2813,28 @@ void print_header(char *delim)
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
- for (mp = sys.tp; mp; mp = mp->next) {
+ if (DO_BIC(BIC_LLC_RPS))
+ outp += sprintf(outp, "%sLLCkRPS", (printed++ ? delim : ""));
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
- }
- }
+ if (DO_BIC(BIC_LLC_HIT))
+ outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : ""));
- for (pp = sys.perf_tp; pp; pp = pp->next) {
+ for (mp = sys.tp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_tp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_tp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
-
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
@@ -2825,63 +2859,36 @@ void print_header(char *delim)
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
- if (platform->rapl_msrs && !rapl_joules) {
+ if (valid_rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
- } else if (platform->rapl_msrs && rapl_joules) {
+ } else if (valid_rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
}
- for (mp = sys.cp; mp; mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%10.10s", delim, mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%s", delim, mp->name);
- }
- }
+ for (mp = sys.cp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- for (pp = sys.perf_cp; pp; pp = pp->next) {
-
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_cp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_cp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
ppmt = ppmt->next;
}
-
if (DO_BIC(BIC_PkgTmp))
outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
@@ -2963,51 +2970,22 @@ void print_header(char *delim)
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
- for (mp = sys.pp; mp; mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 64)
- outp += sprintf(outp, "%s%18.18s", delim, mp->name);
- else if (mp->width == 32)
- outp += sprintf(outp, "%s%10.10s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%7.7s", delim, mp->name);
- } else {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", delim, mp->name);
- else
- outp += sprintf(outp, "%s%7.7s", delim, mp->name);
- }
- }
-
- for (pp = sys.perf_pp; pp; pp = pp->next) {
+ for (mp = sys.pp; mp; mp = mp->next)
+ outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format);
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 64)
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
- } else {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
- else
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
- }
- }
+ for (pp = sys.perf_pp; pp; pp = pp->next)
+ outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format);
ppmt = sys.pmt_pp;
while (ppmt) {
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
- else
- outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
-
+ outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
case PMT_TYPE_XTAL_TIME:
case PMT_TYPE_TCORE_CLOCK:
- outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
+ outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format);
break;
}
@@ -3022,6 +3000,25 @@ void print_header(char *delim)
outp += sprintf(outp, "\n");
}
+/*
+ * pct()
+ *
+ * If absolute value is < 1.1, return percentage
+ * otherwise, return nan
+ *
+ * return value is appropriate for printing percentages with %f
+ * while flagging some obvious erroneous values.
+ */
+double pct(double d)
+{
+
+ double abs = fabs(d);
+
+ if (abs < 1.10)
+ return (100.0 * d);
+ return nan("");
+}
+
int dump_counters(PER_THREAD_PARAMS)
{
int i;
@@ -3047,14 +3044,16 @@ int dump_counters(PER_THREAD_PARAMS)
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
+ outp += sprintf(outp, "LLC refs: %lld", t->llc.references);
+ outp += sprintf(outp, "LLC miss: %lld", t->llc.misses);
+ outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses) / t->llc.references));
+
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp +=
- sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
- t->counter[i], mp->sp->path);
+ outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path);
}
}
- if (c && is_cpu_first_thread_in_core(t, c, p)) {
+ if (c && is_cpu_first_thread_in_core(t, c)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
@@ -3069,14 +3068,12 @@ int dump_counters(PER_THREAD_PARAMS)
outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp +=
- sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
- c->counter[i], mp->sp->path);
+ outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p && is_cpu_first_core_in_package(t, c, p)) {
+ if (p && is_cpu_first_core_in_package(t, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -3106,9 +3103,7 @@ int dump_counters(PER_THREAD_PARAMS)
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp +=
- sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
- p->counter[i], mp->sp->path);
+ outp += sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, p->counter[i], mp->sp->path);
}
}
@@ -3134,6 +3129,26 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir
return scaled;
}
+void get_perf_llc_stats(int cpu, struct llc_stats *llc)
+{
+ struct read_format {
+ unsigned long long num_read;
+ struct llc_stats llc;
+ } r;
+ const ssize_t expected_read_size = sizeof(r);
+ ssize_t actual_read_size;
+
+ actual_read_size = read(fd_llc_percpu[cpu], &r, expected_read_size);
+
+ if (actual_read_size == -1)
+ err(-1, "%s(cpu%d,) %d,,%ld\n", __func__, cpu, fd_llc_percpu[cpu], expected_read_size);
+
+ llc->references = r.llc.references;
+ llc->misses = r.llc.misses;
+ if (actual_read_size != expected_read_size)
+ warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size);
+}
+
/*
* column formatting convention & formats
*/
@@ -3143,7 +3158,8 @@ int format_counters(PER_THREAD_PARAMS)
struct platform_counters *pplat_cnt = NULL;
double interval_float, tsc;
- char *fmt8;
+ char *fmt8 = "%s%.2f";
+
int i;
struct msr_counter *mp;
struct perf_counter_info *pp;
@@ -3157,11 +3173,11 @@ int format_counters(PER_THREAD_PARAMS)
}
/* if showing only 1st thread in core and this isn't one, bail out */
- if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
+ if (show_core_only && !is_cpu_first_thread_in_core(t, c))
return 0;
/* if showing only 1st thread in pkg and this isn't one, bail out */
- if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
+ if (show_pkg_only && !is_cpu_first_core_in_package(t, p))
return 0;
/*if not summary line and --cpu is used */
@@ -3223,8 +3239,7 @@ int format_counters(PER_THREAD_PARAMS)
}
if (DO_BIC(BIC_Node)) {
if (t)
- outp += sprintf(outp, "%s%d",
- (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
@@ -3246,15 +3261,13 @@ int format_counters(PER_THREAD_PARAMS)
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
if (DO_BIC(BIC_Busy))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->mperf / tsc));
if (DO_BIC(BIC_Bzy_MHz)) {
if (has_base_hz)
- outp +=
- sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
else
- outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
- tsc / units * t->aperf / t->mperf / interval_float);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), tsc / units * t->aperf / t->mperf / interval_float);
}
if (DO_BIC(BIC_TSC_MHz))
@@ -3283,96 +3296,81 @@ int format_counters(PER_THREAD_PARAMS)
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
- /* Added counters */
+ /* LLC Stats */
+ if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) {
+ if (DO_BIC(BIC_LLC_RPS))
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000);
+
+ if (DO_BIC(BIC_LLC_HIT))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses) / t->llc.references));
+ }
+
+ /* Added Thread Counters */
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
- } else if (mp->format == FORMAT_DELTA) {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
- } else if (mp->format == FORMAT_PERCENT) {
+ if (mp->format == FORMAT_RAW)
+ outp += print_hex_value(mp->width, &printed, delim, t->counter[i]);
+ else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(mp->width, &printed, delim, t->counter[i]);
+ else if (mp->format == FORMAT_PERCENT) {
if (mp->type == COUNTER_USEC)
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- t->counter[i] / interval_float / 10000);
+ outp += print_float_value(&printed, delim, t->counter[i] / interval_float / 10000);
else
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
+ outp += print_float_value(&printed, delim, pct(t->counter[i] / tsc));
}
}
- /* Added perf counters */
+ /* Added perf Thread Counters */
for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) {
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)t->perf_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]);
- } else if (pp->format == FORMAT_DELTA) {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]);
- } else if (pp->format == FORMAT_PERCENT) {
+ if (pp->format == FORMAT_RAW)
+ outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]);
+ else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]);
+ else if (pp->format == FORMAT_PERCENT) {
if (pp->type == COUNTER_USEC)
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- t->perf_counter[i] / interval_float / 10000);
+ outp += print_float_value(&printed, delim, t->perf_counter[i] / interval_float / 10000);
else
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc);
+ outp += print_float_value(&printed, delim, pct(t->perf_counter[i] / tsc));
}
}
+ /* Added PMT Thread Counters */
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = t->pmt_counter[i];
double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)t->pmt_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]);
-
+ outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, t->pmt_counter[i]);
break;
case PMT_TYPE_XTAL_TIME:
- value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ value_converted = pct(value_raw / crystal_hz / interval_float);
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
case PMT_TYPE_TCORE_CLOCK:
- value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float);
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
/* C1 */
if (DO_BIC(BIC_CPU_c1))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->c1 / tsc));
/* print per-core data only for 1st thread in core */
- if (!is_cpu_first_thread_in_core(t, c, p))
+ if (!is_cpu_first_thread_in_core(t, c))
goto done;
if (DO_BIC(BIC_CPU_c3))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c3 / tsc));
if (DO_BIC(BIC_CPU_c6))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c6 / tsc));
if (DO_BIC(BIC_CPU_c7))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c7 / tsc));
/* Mod%c6 */
if (DO_BIC(BIC_Mod_c6))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->mc6_us / tsc));
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
@@ -3381,77 +3379,53 @@ int format_counters(PER_THREAD_PARAMS)
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
+ /* Added Core Counters */
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
- } else if (mp->format == FORMAT_DELTA) {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
- } else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
- }
+ if (mp->format == FORMAT_RAW)
+ outp += print_hex_value(mp->width, &printed, delim, c->counter[i]);
+ else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(mp->width, &printed, delim, c->counter[i]);
+ else if (mp->format == FORMAT_PERCENT)
+ outp += print_float_value(&printed, delim, pct(c->counter[i] / tsc));
}
+ /* Added perf Core counters */
for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)c->perf_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]);
- } else if (pp->format == FORMAT_DELTA) {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]);
- } else if (pp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc);
- }
+ if (pp->format == FORMAT_RAW)
+ outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]);
+ else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]);
+ else if (pp->format == FORMAT_PERCENT)
+ outp += print_float_value(&printed, delim, pct(c->perf_counter[i] / tsc));
}
+ /* Added PMT Core counters */
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = c->pmt_counter[i];
double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)c->pmt_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]);
-
+ outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, c->pmt_counter[i]);
break;
case PMT_TYPE_XTAL_TIME:
- value_converted = 100.0 * value_raw / crystal_hz / interval_float;
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
+ value_converted = pct(value_raw / crystal_hz / interval_float);
+ outp += print_float_value(&printed, delim, value_converted);
break;
case PMT_TYPE_TCORE_CLOCK:
- value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
+ value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float);
+ outp += print_float_value(&printed, delim, value_converted);
}
}
- fmt8 = "%s%.2f";
-
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
- outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
- if (!is_cpu_first_core_in_package(t, c, p))
+ if (!is_cpu_first_core_in_package(t, p))
goto done;
/* PkgTmp */
@@ -3463,8 +3437,7 @@ int format_counters(PER_THREAD_PARAMS)
if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
} else {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- p->gfx_rc6_ms / 10.0 / interval_float);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float);
}
}
@@ -3481,8 +3454,7 @@ int format_counters(PER_THREAD_PARAMS)
if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
} else {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- p->sam_mc6_ms / 10.0 / interval_float);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->sam_mc6_ms / 10.0 / interval_float);
}
}
@@ -3496,150 +3468,112 @@ int format_counters(PER_THREAD_PARAMS)
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100 * p->pkg_wtd_core_c0 / tsc); /* can exceed 100% */
if (DO_BIC(BIC_Any_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_core_c0 / tsc));
if (DO_BIC(BIC_GFX_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_gfxe_c0 / tsc));
if (DO_BIC(BIC_CPUGFX))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_both_core_gfxe_c0 / tsc));
if (DO_BIC(BIC_Pkgpc2))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc2 / tsc));
if (DO_BIC(BIC_Pkgpc3))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc3 / tsc));
if (DO_BIC(BIC_Pkgpc6))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc6 / tsc));
if (DO_BIC(BIC_Pkgpc7))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc7 / tsc));
if (DO_BIC(BIC_Pkgpc8))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc8 / tsc));
if (DO_BIC(BIC_Pkgpc9))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc9 / tsc));
if (DO_BIC(BIC_Pkgpc10))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc10 / tsc));
if (DO_BIC(BIC_Diec6))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->die_c6 / crystal_hz / interval_float));
if (DO_BIC(BIC_CPU_LPI)) {
if (p->cpu_lpi >= 0)
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->cpu_lpi / 1000000.0 / interval_float));
else
outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_SYS_LPI)) {
if (p->sys_lpi >= 0)
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->sys_lpi / 1000000.0 / interval_float));
else
outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_PkgWatt))
- outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
- outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
- outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
- outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
+ sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""),
- rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
+ sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
+ /* Added Package Counters */
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) {
- if (mp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
- } else if (mp->format == FORMAT_DELTA) {
- if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
- } else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
- } else if (mp->type == COUNTER_K2M)
+ if (mp->format == FORMAT_RAW)
+ outp += print_hex_value(mp->width, &printed, delim, p->counter[i]);
+ else if (mp->type == COUNTER_K2M)
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000);
+ else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(mp->width, &printed, delim, p->counter[i]);
+ else if (mp->format == FORMAT_PERCENT)
+ outp += print_float_value(&printed, delim, pct(p->counter[i] / tsc));
}
+ /* Added perf Package Counters */
for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
- if (pp->format == FORMAT_RAW) {
- if (pp->width == 32)
- outp +=
- sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)p->perf_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]);
- } else if (pp->format == FORMAT_DELTA) {
- if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
- outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]);
- else
- outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]);
- } else if (pp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc);
- } else if (pp->type == COUNTER_K2M) {
- outp +=
- sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000);
- }
+ if (pp->format == FORMAT_RAW)
+ outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]);
+ else if (pp->type == COUNTER_K2M)
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000);
+ else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE)
+ outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]);
+ else if (pp->format == FORMAT_PERCENT)
+ outp += print_float_value(&printed, delim, pct(p->perf_counter[i] / tsc));
}
+ /* Added PMT Package Counters */
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = p->pmt_counter[i];
double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
- if (pmt_counter_get_width(ppmt) <= 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
- (unsigned int)p->pmt_counter[i]);
- else
- outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]);
-
+ outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, p->pmt_counter[i]);
break;
case PMT_TYPE_XTAL_TIME:
- value_converted = 100.0 * value_raw / crystal_hz / interval_float;
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
+ value_converted = pct(value_raw / crystal_hz / interval_float);
+ outp += print_float_value(&printed, delim, value_converted);
break;
case PMT_TYPE_TCORE_CLOCK:
- value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
+ value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float);
+ outp += print_float_value(&printed, delim, value_converted);
}
}
@@ -3754,11 +3688,10 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
- old->rapl_dram_perf_status.raw_value =
- new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value = new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE)
+ if (mp->format == FORMAT_RAW)
old->counter[i] = new->counter[i];
else if (mp->format == FORMAT_AVERAGE)
old->counter[i] = new->counter[i];
@@ -3862,8 +3795,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
/* check for TSC < 1 Mcycles over interval */
if (old->tsc < (1000 * 1000))
errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
- "You can disable all c-states by booting with \"idle=poll\"\n"
- "or just the deep ones with \"processor.max_cstate=1\"");
+ "You can disable all c-states by booting with \"idle=poll\"\n" "or just the deep ones with \"processor.max_cstate=1\"");
old->c1 = new->c1 - old->c1;
@@ -3892,8 +3824,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
old->c1 = 0;
else {
/* normal case, derive c1 */
- old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
- - core_delta->c6 - core_delta->c7;
+ old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7;
}
}
@@ -3915,6 +3846,12 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
if (DO_BIC(BIC_SMI))
old->smi_count = new->smi_count - old->smi_count;
+ if (DO_BIC(BIC_LLC_RPS))
+ old->llc.references = new->llc.references - old->llc.references;
+
+ if (DO_BIC(BIC_LLC_HIT))
+ old->llc.misses = new->llc.misses - old->llc.misses;
+
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE)
old->counter[i] = new->counter[i];
@@ -3939,20 +3876,19 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
return 0;
}
-int delta_cpu(struct thread_data *t, struct core_data *c,
- struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
+int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
{
int retval = 0;
/* calculate core delta only for 1st thread in core */
- if (is_cpu_first_thread_in_core(t, c, p))
+ if (is_cpu_first_thread_in_core(t, c))
delta_core(c, c2);
/* always calculate thread delta */
retval = delta_thread(t, t2, c2); /* c2 is core delta */
/* calculate package delta only for 1st core in package */
- if (is_cpu_first_core_in_package(t, c, p))
+ if (is_cpu_first_core_in_package(t, p))
retval |= delta_package(p, p2);
return retval;
@@ -3993,6 +3929,9 @@ void clear_counters(PER_THREAD_PARAMS)
t->nmi_count = 0;
t->smi_count = 0;
+ t->llc.references = 0;
+ t->llc.misses = 0;
+
c->c3 = 0;
c->c6 = 0;
c->c7 = 0;
@@ -4001,6 +3940,9 @@ void clear_counters(PER_THREAD_PARAMS)
rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
+ t->llc.references = 0;
+ t->llc.misses = 0;
+
p->pkg_wtd_core_c0 = 0;
p->pkg_any_core_c0 = 0;
p->pkg_any_gfxe_c0 = 0;
@@ -4098,6 +4040,9 @@ int sum_counters(PER_THREAD_PARAMS)
average.threads.nmi_count += t->nmi_count;
average.threads.smi_count += t->smi_count;
+ average.threads.llc.references += t->llc.references;
+ average.threads.llc.misses += t->llc.misses;
+
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
@@ -4115,7 +4060,7 @@ int sum_counters(PER_THREAD_PARAMS)
}
/* sum per-core values only for 1st thread in core */
- if (!is_cpu_first_thread_in_core(t, c, p))
+ if (!is_cpu_first_thread_in_core(t, c))
return 0;
average.cores.c3 += c->c3;
@@ -4145,7 +4090,7 @@ int sum_counters(PER_THREAD_PARAMS)
}
/* sum per-pkg values only for 1st core in pkg */
- if (!is_cpu_first_core_in_package(t, c, p))
+ if (!is_cpu_first_core_in_package(t, p))
return 0;
if (DO_BIC(BIC_Totl_c0))
@@ -4411,8 +4356,7 @@ unsigned long long get_legacy_uncore_mhz(int package)
*/
for (die = 0; die <= topo.max_die_id; ++die) {
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz",
- package, die);
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die);
if (access(path, R_OK) == 0)
return (snapshot_sysfs_counter(path) / 1000);
@@ -4523,11 +4467,6 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
return 0;
}
-struct amperf_group_fd {
- int aperf; /* Also the group descriptor */
- int mperf;
-};
-
static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
{
int fdmt;
@@ -4727,8 +4666,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct
const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
if (actual_read_size != expected_read_size)
- err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
- actual_read_size);
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size);
}
for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
@@ -4966,8 +4904,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t)
const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data));
if (actual_read_size != expected_read_size)
- err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
- actual_read_size);
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size);
}
for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) {
@@ -5121,6 +5058,9 @@ int get_counters(PER_THREAD_PARAMS)
get_smi_aperf_mperf(cpu, t);
+ if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT))
+ get_perf_llc_stats(cpu, &t->llc);
+
if (DO_BIC(BIC_IPC))
if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
return -4;
@@ -5144,7 +5084,7 @@ int get_counters(PER_THREAD_PARAMS)
t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id);
/* collect core counters only for 1st thread in core */
- if (!is_cpu_first_thread_in_core(t, c, p))
+ if (!is_cpu_first_thread_in_core(t, c))
goto done;
if (platform->has_per_core_rapl) {
@@ -5188,7 +5128,7 @@ int get_counters(PER_THREAD_PARAMS)
c->pmt_counter[i] = pmt_read_counter(pp, c->core_id);
/* collect package counters only for 1st core in package */
- if (!is_cpu_first_core_in_package(t, c, p))
+ if (!is_cpu_first_core_in_package(t, p))
goto done;
if (DO_BIC(BIC_Totl_c0)) {
@@ -5277,48 +5217,39 @@ char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2",
"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
};
-int nhm_pkg_cstate_limits[16] =
- { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int snb_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int hsw_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int slv_pkg_cstate_limits[16] =
- { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCL__6, PCL__7
};
-int amt_pkg_cstate_limits[16] =
- { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int phi_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int glm_pkg_cstate_limits[16] =
- { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int skx_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
-int icx_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
@@ -5393,8 +5324,7 @@ static void dump_power_ctl(void)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
- fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
- base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
+ fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
if (platform->has_cst_prewake_bit)
@@ -5487,8 +5417,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset)
ratio = (msr >> shift) & 0xFF;
group_size = (core_counts >> shift) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio, bclk, ratio * bclk, group_size);
}
return;
@@ -5586,9 +5515,7 @@ static void dump_knl_turbo_ratio_limits(void)
for (i = buckets_no - 1; i >= 0; i--)
if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
- fprintf(outf,
- "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio[i], bclk, ratio[i] * bclk, cores[i]);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
static void dump_cst_cfg(void)
@@ -5673,43 +5600,37 @@ void print_irtl(void)
if (platform->supported_cstates & PC3) {
get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
if (platform->supported_cstates & PC6) {
get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
if (platform->supported_cstates & PC7) {
get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
if (platform->supported_cstates & PC8) {
get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
if (platform->supported_cstates & PC9) {
get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
if (platform->supported_cstates & PC10) {
get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
}
@@ -5743,6 +5664,20 @@ void free_fd_instr_count_percpu(void)
fd_instr_count_percpu = NULL;
}
+void free_fd_llc_percpu(void)
+{
+ if (!fd_llc_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_llc_percpu[i] != 0)
+ close(fd_llc_percpu[i]);
+ }
+
+ free(fd_llc_percpu);
+ fd_llc_percpu = NULL;
+}
+
void free_fd_cstate(void)
{
if (!ccstate_counter_info)
@@ -5867,6 +5802,7 @@ void free_all_buffers(void)
free_fd_percpu();
free_fd_instr_count_percpu();
+ free_fd_llc_percpu();
free_fd_msr();
free_fd_rapl_percpu();
free_fd_cstate();
@@ -6213,6 +6149,7 @@ void linux_perf_init(void);
void msr_perf_init(void);
void rapl_perf_init(void);
void cstate_perf_init(void);
+void perf_llc_init(void);
void added_perf_counters_init(void);
void pmt_init(void);
@@ -6224,10 +6161,10 @@ void re_initialize(void)
msr_perf_init();
rapl_perf_init();
cstate_perf_init();
+ perf_llc_init();
added_perf_counters_init();
pmt_init();
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
- topo.allowed_cpus);
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -6673,6 +6610,7 @@ release_timer:
timer_delete(timerid);
release_msr:
free(per_cpu_msr_sum);
+ per_cpu_msr_sum = NULL;
}
/*
@@ -6797,21 +6735,43 @@ restart:
}
}
-void check_dev_msr()
+int probe_dev_msr(void)
{
struct stat sb;
char pathname[32];
- if (no_msr)
- return;
-#if defined(ANDROID)
sprintf(pathname, "/dev/msr%d", base_cpu);
-#else
+ return !stat(pathname, &sb);
+}
+
+int probe_dev_cpu_msr(void)
+{
+ struct stat sb;
+ char pathname[32];
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
-#endif
- if (stat(pathname, &sb))
- if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- no_msr = 1;
+ return !stat(pathname, &sb);
+}
+
+int probe_msr_driver(void)
+{
+ if (probe_dev_msr()) {
+ use_android_msr_path = 1;
+ return 1;
+ }
+ return probe_dev_cpu_msr();
+}
+
+void check_msr_driver(void)
+{
+ if (probe_msr_driver())
+ return;
+
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ no_msr = 1;
+
+ if (!probe_msr_driver())
+ no_msr = 1;
}
/*
@@ -6866,11 +6826,7 @@ void check_msr_permission(void)
failed += check_for_cap_sys_rawio();
/* test file permissions */
-#if defined(ANDROID)
- sprintf(pathname, "/dev/msr%d", base_cpu);
-#else
- sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
-#endif
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
failed++;
}
@@ -6999,8 +6955,7 @@ static void probe_intel_uncore_frequency_legacy(void)
int k, l;
char path_base[128];
- sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
- j);
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j);
sprintf(path, "%s/current_freq_khz", path_base);
if (access(path, R_OK))
@@ -7083,8 +7038,7 @@ static void probe_intel_uncore_frequency_cluster(void)
*/
if BIC_IS_ENABLED
(BIC_UNCORE_MHZ)
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0,
- package_id);
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
if (quiet)
continue;
@@ -7093,8 +7047,7 @@ static void probe_intel_uncore_frequency_cluster(void)
k = read_sysfs_int(path);
sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
- cluster_id, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, cluster_id, k / 1000, l / 1000);
sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
@@ -7156,21 +7109,17 @@ static void probe_graphics(void)
else
goto next;
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms",
- gt0_is_gt ? GFX_rc6 : SAM_mc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq",
- gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms",
- gt0_is_gt ? SAM_mc6 : GFX_rc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq",
- gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
goto end;
}
@@ -7425,8 +7374,7 @@ int print_hwp(PER_THREAD_PARAMS)
"(high %d guar %d eff %d low %d)\n",
cpu, msr,
(unsigned int)HWP_HIGHEST_PERF(msr),
- (unsigned int)HWP_GUARANTEED_PERF(msr),
- (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
+ (unsigned int)HWP_GUARANTEED_PERF(msr), (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
return 0;
@@ -7437,8 +7385,7 @@ int print_hwp(PER_THREAD_PARAMS)
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
(unsigned int)(((msr) >> 16) & 0xff),
- (unsigned int)(((msr) >> 24) & 0xff),
- (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
+ (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
if (has_hwp_pkg) {
if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
@@ -7449,23 +7396,20 @@ int print_hwp(PER_THREAD_PARAMS)
cpu, msr,
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
- (unsigned int)(((msr) >> 16) & 0xff),
- (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
+ (unsigned int)(((msr) >> 16) & 0xff), (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
}
if (has_hwp_notify) {
if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
- "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
- cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
+ "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
}
if (get_msr(cpu, MSR_HWP_STATUS, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
- "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
- cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
+ "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
return 0;
}
@@ -7510,8 +7454,7 @@ int print_perf_limit(PER_THREAD_PARAMS)
(msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 5) ? "Auto-HWP, " : "",
(msr & 1 << 4) ? "Graphics, " : "",
- (msr & 1 << 2) ? "bit2, " : "",
- (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
+ (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
(msr & 1 << 31) ? "bit31, " : "",
(msr & 1 << 30) ? "bit30, " : "",
@@ -7524,8 +7467,7 @@ int print_perf_limit(PER_THREAD_PARAMS)
(msr & 1 << 22) ? "VR-Therm, " : "",
(msr & 1 << 21) ? "Auto-HWP, " : "",
(msr & 1 << 20) ? "Graphics, " : "",
- (msr & 1 << 18) ? "bit18, " : "",
- (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
+ (msr & 1 << 18) ? "bit18, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
}
if (platform->plr_msrs & PLR_GFX) {
@@ -7537,16 +7479,14 @@ int print_perf_limit(PER_THREAD_PARAMS)
(msr & 1 << 4) ? "Graphics, " : "",
(msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 8) ? "Amps, " : "",
- (msr & 1 << 9) ? "GFXPwr, " : "",
- (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 20) ? "Graphics, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "",
(msr & 1 << 24) ? "Amps, " : "",
- (msr & 1 << 25) ? "GFXPwr, " : "",
- (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
if (platform->plr_msrs & PLR_RING) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
@@ -7555,14 +7495,12 @@ int print_perf_limit(PER_THREAD_PARAMS)
(msr & 1 << 0) ? "PROCHOT, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 6) ? "VR-Therm, " : "",
- (msr & 1 << 8) ? "Amps, " : "",
- (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "",
- (msr & 1 << 24) ? "Amps, " : "",
- (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ (msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
return 0;
}
@@ -7582,7 +7520,7 @@ double get_tdp_intel(void)
{
unsigned long long msr;
- if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+ if (valid_rapl_msrs & RAPL_PKG_POWER_INFO)
if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
return get_quirk_tdp();
@@ -7613,12 +7551,12 @@ void rapl_probe_intel(void)
CLR_BIC(BIC_GFX_J, &bic_enabled);
}
- if (!platform->rapl_msrs || no_msr)
+ if (!valid_rapl_msrs || no_msr)
return;
- if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ if (!(valid_rapl_msrs & RAPL_PKG_PERF_STATUS))
CLR_BIC(BIC_PKG__, &bic_enabled);
- if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ if (!(valid_rapl_msrs & RAPL_DRAM_PERF_STATUS))
CLR_BIC(BIC_RAM__, &bic_enabled);
/* units on package 0, verify later other packages match */
@@ -7667,7 +7605,7 @@ void rapl_probe_amd(void)
CLR_BIC(BIC_Cor_J, &bic_enabled);
}
- if (!platform->rapl_msrs || no_msr)
+ if (!valid_rapl_msrs || no_msr)
return;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
@@ -7690,8 +7628,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units,
- (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
- (((msr >> 16) & 1) ? "EN" : "DIS"));
+ (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS"));
return;
}
@@ -7857,7 +7794,7 @@ int print_rapl(PER_THREAD_PARAMS)
UNUSED(c);
UNUSED(p);
- if (!platform->rapl_msrs)
+ if (!valid_rapl_msrs)
return 0;
/* RAPL counters are per package, so print only for 1st thread/package */
@@ -7870,7 +7807,7 @@ int print_rapl(PER_THREAD_PARAMS)
return -1;
}
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
+ if (valid_rapl_msrs & RAPL_AMD_F17H) {
msr_name = "MSR_RAPL_PWR_UNIT";
if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
return -1;
@@ -7883,7 +7820,7 @@ int print_rapl(PER_THREAD_PARAMS)
fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
- if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
+ if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) {
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
@@ -7892,25 +7829,22 @@ int print_rapl(PER_THREAD_PARAMS)
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
- ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
- ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (platform->rapl_msrs & RAPL_PKG) {
+ if (valid_rapl_msrs & RAPL_PKG) {
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
- fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 63) & 1 ? "" : "UN");
+ fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1");
fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
cpu,
((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units,
- (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
- ((msr >> 48) & 1) ? "EN" : "DIS");
+ (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS");
if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
return -9;
@@ -7920,7 +7854,7 @@ int print_rapl(PER_THREAD_PARAMS)
cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
}
- if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
+ if (valid_rapl_msrs & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
@@ -7928,31 +7862,28 @@ int print_rapl(PER_THREAD_PARAMS)
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
- ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
- ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (platform->rapl_msrs & RAPL_DRAM) {
+ if (valid_rapl_msrs & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
- fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (platform->rapl_msrs & RAPL_CORE_POLICY) {
+ if (valid_rapl_msrs & RAPL_CORE_POLICY) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
}
- if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
+ if (valid_rapl_msrs & RAPL_CORE_POWER_LIMIT) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
- fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "Cores Limit");
}
- if (platform->rapl_msrs & RAPL_GFX) {
+ if (valid_rapl_msrs & RAPL_GFX) {
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8;
@@ -7960,20 +7891,58 @@ int print_rapl(PER_THREAD_PARAMS)
if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
return -9;
- fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "GFX Limit");
}
return 0;
}
/*
+ * probe_rapl_msrs
+ *
+ * initialize global valid_rapl_msrs to platform->plat_rapl_msrs
+ * only if PKG_ENERGY counter is enumerated and reads non-zero
+ */
+void probe_rapl_msrs(void)
+{
+ int ret;
+ off_t offset;
+ unsigned long long msr_value;
+
+ if (no_msr)
+ return;
+
+ if ((platform->plat_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H)) == 0)
+ return;
+
+ offset = idx_to_offset(IDX_PKG_ENERGY);
+ if (offset < 0)
+ return;
+
+ ret = get_msr(base_cpu, offset, &msr_value);
+ if (ret) {
+ if (debug)
+ fprintf(outf, "Can not read RAPL_PKG_ENERGY MSR(0x%llx)\n", (unsigned long long)offset);
+ return;
+ }
+ if (msr_value == 0) {
+ if (debug)
+ fprintf(outf, "RAPL_PKG_ENERGY MSR(0x%llx) == ZERO: disabling all RAPL MSRs\n", (unsigned long long)offset);
+ return;
+ }
+
+ valid_rapl_msrs = platform->plat_rapl_msrs; /* success */
+}
+
+/*
* probe_rapl()
*
* sets rapl_power_units, rapl_energy_units, rapl_time_units
*/
void probe_rapl(void)
{
+ probe_rapl_msrs();
+
if (genuine_intel)
rapl_probe_intel();
if (authentic_amd || hygon_genuine)
@@ -7984,7 +7953,7 @@ void probe_rapl(void)
print_rapl_sysfs();
- if (!platform->rapl_msrs || no_msr)
+ if (!valid_rapl_msrs || no_msr)
return;
for_all_cpus(print_rapl, ODD_COUNTERS);
@@ -8088,7 +8057,7 @@ int print_thermal(PER_THREAD_PARAMS)
cpu = t->cpu_id;
/* DTS is per-core, no need to print for each thread */
- if (!is_cpu_first_thread_in_core(t, c, p))
+ if (!is_cpu_first_thread_in_core(t, c))
return 0;
if (cpu_migrate(cpu)) {
@@ -8096,7 +8065,7 @@ int print_thermal(PER_THREAD_PARAMS)
return -1;
}
- if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+ if (do_ptm && is_cpu_first_core_in_package(t, p)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return 0;
@@ -8108,8 +8077,7 @@ int print_thermal(PER_THREAD_PARAMS)
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2);
}
if (do_dts && debug) {
@@ -8120,16 +8088,14 @@ int print_thermal(PER_THREAD_PARAMS)
dts = (msr >> 16) & 0x7F;
resolution = (msr >> 27) & 0xF;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
- cpu, msr, tj_max - dts, resolution);
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tj_max - dts, resolution);
if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
return 0;
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2);
}
return 0;
@@ -8203,8 +8169,7 @@ void decode_misc_enable_msr(void)
msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
- msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
- msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
+ msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
}
void decode_misc_feature_control(void)
@@ -8243,8 +8208,7 @@ void decode_misc_pwr_mgmt_msr(void)
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
- base_cpu, msr,
- msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
+ base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
}
/*
@@ -8297,30 +8261,26 @@ void print_dev_latency(void)
close(fd);
}
-static int has_instr_count_access(void)
+static int has_perf_instr_count_access(void)
{
int fd;
- int has_access;
if (no_perf)
return 0;
fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
- has_access = fd != -1;
-
if (fd != -1)
close(fd);
- if (!has_access)
+ if (fd == -1)
warnx("Failed to access %s. Some of the counters may not be available\n"
- "\tRun as root to enable them or use %s to disable the access explicitly",
- "instructions retired perf counter", "--no-perf");
+ "\tRun as root to enable them or use %s to disable the access explicitly", "perf instructions retired counter",
+ "'--hide IPC' or '--no-perf'");
- return has_access;
+ return (fd != -1);
}
-int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
- double *scale_, enum rapl_unit *unit_)
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_)
{
int ret = -1;
@@ -8370,11 +8330,16 @@ void linux_perf_init(void)
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) {
fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
if (fd_instr_count_percpu == NULL)
err(-1, "calloc fd_instr_count_percpu");
}
+ if (BIC_IS_ENABLED(BIC_LLC_RPS)) {
+ fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_llc_percpu == NULL)
+ err(-1, "calloc fd_llc_percpu");
+ }
}
void rapl_perf_init(void)
@@ -8485,7 +8450,7 @@ void rapl_perf_init(void)
/* Assumes msr_counter_info is populated */
static int has_amperf_access(void)
{
- return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present &&
+ return cpuid_has_aperf_mperf && msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present &&
msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present;
}
@@ -8708,8 +8673,7 @@ void cstate_perf_init_(bool soft_c1)
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
/* User MSR for this counter */
- } else if (pkg_cstate_limit >= cai->pkg_cstate_limit
- && add_msr_counter(cpu, cai->msr) >= 0) {
+ } else if (pkg_cstate_limit >= cai->pkg_cstate_limit && add_msr_counter(cpu, cai->msr) >= 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
}
@@ -8827,8 +8791,7 @@ void process_cpuid()
hygon_genuine = 1;
if (!quiet)
- fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
- (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
+ fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
__cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
@@ -8857,8 +8820,7 @@ void process_cpuid()
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
- family, model, stepping, family, model, stepping);
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping);
if (ucode_patch_valid)
fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
fputc('\n', outf);
@@ -8872,8 +8834,7 @@ void process_cpuid()
ecx_flags & (1 << 8) ? "TM2" : "-",
edx_flags & (1 << 4) ? "TSC" : "-",
edx_flags & (1 << 5) ? "MSR" : "-",
- edx_flags & (1 << 22) ? "ACPI-TM" : "-",
- edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
+ edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
}
probe_platform_features(family, model);
@@ -8897,7 +8858,7 @@ void process_cpuid()
*/
__cpuid(0x6, eax, ebx, ecx, edx);
- has_aperf = ecx & (1 << 0);
+ cpuid_has_aperf_mperf = ecx & (1 << 0);
do_dts = eax & (1 << 0);
if (do_dts)
BIC_PRESENT(BIC_CoreTmp);
@@ -8915,14 +8876,13 @@ void process_cpuid()
if (!quiet)
fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
- has_aperf ? "" : "No-",
+ cpuid_has_aperf_mperf ? "" : "No-",
has_turbo ? "" : "No-",
do_dts ? "" : "No-",
do_ptm ? "" : "No-",
has_hwp ? "" : "No-",
has_hwp_notify ? "" : "No-",
- has_hwp_activity_window ? "" : "No-",
- has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
+ has_hwp_activity_window ? "" : "No-", has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
if (!quiet)
decode_misc_enable_msr();
@@ -8956,8 +8916,7 @@ void process_cpuid()
if (ebx_tsc != 0) {
if (!quiet && (ebx != 0))
- fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
- eax_crystal, ebx_tsc, crystal_hz);
+ fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0)
crystal_hz = platform->crystal_freq;
@@ -8989,11 +8948,10 @@ void process_cpuid()
tsc_tweak = base_hz / tsc_hz;
if (!quiet)
- fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
- base_mhz, max_mhz, bus_mhz);
+ fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz);
}
- if (has_aperf)
+ if (cpuid_has_aperf_mperf)
aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
BIC_PRESENT(BIC_IRQ);
@@ -9045,6 +9003,62 @@ void probe_pm_features(void)
decode_misc_feature_control();
}
+/* perf_llc_probe
+ *
+ * return 1 on success, else 0
+ */
+int has_perf_llc_access(void)
+{
+ int fd;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP);
+ if (fd != -1)
+ close(fd);
+
+ if (fd == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", "perf LLC counters", "'--hide LLC' or '--no-perf'");
+
+ return (fd != -1);
+}
+
+void perf_llc_init(void)
+{
+ int cpu;
+ int retval;
+
+ if (no_perf)
+ return;
+ if (!(BIC_IS_ENABLED(BIC_LLC_RPS) && BIC_IS_ENABLED(BIC_LLC_HIT)))
+ return;
+
+ for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ assert(fd_llc_percpu != 0);
+ fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP);
+ if (fd_llc_percpu[cpu] == -1) {
+ warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu);
+ free_fd_llc_percpu();
+ return;
+ }
+ assert(fd_llc_percpu != 0);
+ retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP);
+ if (retval == -1) {
+ warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu);
+ free_fd_llc_percpu();
+ return;
+ }
+ }
+ BIC_PRESENT(BIC_LLC_RPS);
+ BIC_PRESENT(BIC_LLC_HIT);
+}
+
/*
* in /dev/cpu/ return success for names that are numbers
* ie. filter out ".", "..", "microcode".
@@ -9351,6 +9365,7 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
t->cpu_id = cpu_id;
if (!cpu_is_not_allowed(cpu_id)) {
+
if (c->base_cpu < 0)
c->base_cpu = t->cpu_id;
if (pkg_base[pkg_id].base_cpu < 0)
@@ -9456,7 +9471,7 @@ bool has_added_counters(void)
void check_msr_access(void)
{
- check_dev_msr();
+ check_msr_driver();
check_msr_permission();
if (no_msr)
@@ -9465,8 +9480,16 @@ void check_msr_access(void)
void check_perf_access(void)
{
- if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access())
- CLR_BIC(BIC_IPC, &bic_enabled);
+ if (BIC_IS_ENABLED(BIC_IPC))
+ if (!has_perf_instr_count_access())
+ no_perf = 1;
+
+ if (BIC_IS_ENABLED(BIC_LLC_RPS) || BIC_IS_ENABLED(BIC_LLC_HIT))
+ if (!has_perf_llc_access())
+ no_perf = 1;
+
+ if (no_perf)
+ bic_disable_perf_access();
}
bool perf_has_hybrid_devices(void)
@@ -9589,8 +9612,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo)
perf_config = read_perf_config(perf_device, pinfo->event);
if (perf_config == (unsigned int)-1) {
- warnx("%s: perf/%s/%s: failed to read %s",
- __func__, perf_device, pinfo->event, "config");
+ warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "config");
continue;
}
@@ -9601,8 +9623,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo)
fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0);
if (fd_perf == -1) {
- warnx("%s: perf/%s/%s: failed to open counter on cpu%d",
- __func__, perf_device, pinfo->event, cpu);
+ warnx("%s: perf/%s/%s: failed to open counter on cpu%d", __func__, perf_device, pinfo->event, cpu);
continue;
}
@@ -9611,8 +9632,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo)
pinfo->scale = perf_scale;
if (debug)
- fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n",
- perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]);
+ fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]);
}
pinfo = pinfo->next;
@@ -9926,8 +9946,7 @@ int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum
}
if (conflict) {
- fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n",
- __func__, name);
+ fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", __func__, name);
exit(1);
}
@@ -9970,8 +9989,7 @@ void pmt_init(void)
* CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
*/
pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
- PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
- FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
+ PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
/*
* Rather complex logic for each time we go to the next loop iteration,
@@ -10021,6 +10039,7 @@ void turbostat_init()
linux_perf_init();
rapl_perf_init();
cstate_perf_init();
+ perf_llc_init();
added_perf_counters_init();
pmt_init();
@@ -10126,7 +10145,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2025.09.09 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.12.02 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -10166,8 +10185,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
}
int add_counter(unsigned int msr_num, char *path, char *name,
- unsigned int width, enum counter_scope scope,
- enum counter_type type, enum counter_format format, int flags, int id)
+ unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int id)
{
struct msr_counter *msrp;
@@ -10276,9 +10294,7 @@ int add_counter(unsigned int msr_num, char *path, char *name,
struct perf_counter_info *make_perf_counter_info(const char *perf_device,
const char *perf_event,
const char *name,
- unsigned int width,
- enum counter_scope scope,
- enum counter_type type, enum counter_format format)
+ unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format)
{
struct perf_counter_info *pinfo;
@@ -10353,8 +10369,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char
// FIXME: we might not have debug here yet
if (debug)
- fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n",
- __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope);
+ fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope);
return 0;
}
@@ -10523,8 +10538,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne
pmt_diriter_init(&pmt_iter);
- for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
- dirname = pmt_diriter_next(&pmt_iter)) {
+ for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; dirname = pmt_diriter_next(&pmt_iter)) {
fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
if (fd_telem_dir == -1)
@@ -10536,8 +10550,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne
}
if (fstat(fd_telem_dir, &stat) == -1) {
- fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
- dirname->d_name, strerror(errno));
+ fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, dirname->d_name, strerror(errno));
continue;
}
@@ -10633,8 +10646,7 @@ void parse_add_command_pmt(char *add_command)
}
if (!has_scope) {
- printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n",
- __func__);
+ printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", __func__);
exit(1);
}
@@ -10710,8 +10722,7 @@ next:
}
if (!has_format) {
- fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n",
- __func__, format_name);
+ fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", __func__, format_name);
exit(1);
}
}
@@ -10878,7 +10889,7 @@ void probe_cpuidle_residency(void)
if (is_deferred_skip(name_buf))
continue;
- add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
+ add_counter(0, path, name_buf, 32, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
if (state > max_state)
max_state = state;
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 666b325a62a2..d18284667400 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -1,8 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
CC = $(CROSS_COMPILE)gcc
-BUILD_OUTPUT := $(CURDIR)
+BUILD_OUTPUT := $(CURDIR)
PREFIX := /usr
DESTDIR :=
+DAY := $(shell date +%Y.%m.%d)
+SNAPSHOT = x86_energy_perf_policy-$(DAY)
+
+
ifeq ("$(origin O)", "command line")
BUILD_OUTPUT := $(O)
@@ -27,3 +31,26 @@ install : x86_energy_perf_policy
install -d $(DESTDIR)$(PREFIX)/share/man/man8
install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
+snapshot: x86_energy_perf_policy
+ @rm -rf $(SNAPSHOT)
+ @mkdir $(SNAPSHOT)
+ @cp x86_energy_perf_policy Makefile x86_energy_perf_policy.c x86_energy_perf_policy.8 $(SNAPSHOT)
+
+ @sed -e 's/^#include <linux\/bits.h>/#include "bits.h"/' -e 's/u64/unsigned long long/' ../../../../arch/x86/include/asm/msr-index.h > $(SNAPSHOT)/msr-index.h
+ @echo '#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))' >> $(SNAPSHOT)/msr-index.h
+ @echo "#define BIT(x) (1 << (x))" > $(SNAPSHOT)/bits.h
+ @echo "#define BIT_ULL(nr) (1ULL << (nr))" >> $(SNAPSHOT)/bits.h
+ @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
+ @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
+
+ @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h
+ @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h
+
+ @echo PWD=. > $(SNAPSHOT)/Makefile
+ @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile
+ @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile
+ @sed -e's/.*MSRHEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
+
+ @rm -f $(SNAPSHOT).tar.gz
+ tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT)
+
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
index 78c6361898b1..0aa981c18e56 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -2,7 +2,7 @@
.\" Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8
.SH NAME
-x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
+x86_energy_perf_policy \- Manage Energy vs. Performance Policy
.SH SYNOPSIS
.B x86_energy_perf_policy
.RB "[ options ] [ scope ] [field \ value]"
@@ -19,9 +19,14 @@ x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Spe
.SH DESCRIPTION
\fBx86_energy_perf_policy\fP
displays and updates energy-performance policy settings specific to
-Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
-updates, no matter if the Linux cpufreq sub-system is enabled or not.
+Intel Architecture Processors. It summarizes settings available
+in standard Linux interfaces (eg. cpufreq),
+and also decodes underlying Model Specific Register (MSRs).
+While \fBx86_energy_perf_policy\fP can manage energy-performance policy
+using only MSR access, it prefers standard
+Linux kernel interfaces, when they are available.
+.SH BACKGROUND
Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
may affect a wide range of hardware decisions,
such as how aggressively the hardware enters and exits CPU idle states (C-states)
@@ -200,7 +205,9 @@ runs only as root.
.SH FILES
.ta
.nf
-/dev/cpu/*/msr
+EPB: /sys/devices/system/cpu/cpu*/power/energy_perf_bias
+EPP: /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference
+MSR: /dev/cpu/*/msr
.fi
.SH "SEE ALSO"
.nf
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index ebda9c366b2b..ac37132207a4 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -4,7 +4,7 @@
* policy preference bias on recent X86 processors.
*/
/*
- * Copyright (c) 2010 - 2017 Intel Corporation.
+ * Copyright (c) 2010 - 2025 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -62,6 +62,7 @@ unsigned char turbo_update_value;
unsigned char update_hwp_epp;
unsigned char update_hwp_min;
unsigned char update_hwp_max;
+unsigned char hwp_limits_done_via_sysfs;
unsigned char update_hwp_desired;
unsigned char update_hwp_window;
unsigned char update_hwp_use_pkg;
@@ -94,6 +95,8 @@ unsigned int bdx_highest_ratio;
#define PATH_TO_CPU "/sys/devices/system/cpu/"
#define SYSFS_PATH_MAX 255
+static int use_android_msr_path;
+
/*
* maintain compatibility with original implementation, but don't document it:
*/
@@ -369,7 +372,7 @@ void validate_cpu_selected_set(void)
for (cpu = 0; cpu <= max_cpu_num; ++cpu) {
if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set))
if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
- errx(1, "Requested cpu% is not present", cpu);
+ errx(1, "Requested cpu%d is not present", cpu);
}
}
@@ -517,7 +520,7 @@ void for_packages(unsigned long long pkg_set, int (func)(int))
void print_version(void)
{
- printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
+ printf("x86_energy_perf_policy 2025.11.22 Len Brown <lenb@kernel.org>\n");
}
void cmdline(int argc, char **argv)
@@ -630,7 +633,7 @@ void cmdline(int argc, char **argv)
*/
FILE *fopen_or_die(const char *path, const char *mode)
{
- FILE *filep = fopen(path, "r");
+ FILE *filep = fopen(path, mode);
if (!filep)
err(1, "%s: open failed", path);
@@ -644,7 +647,7 @@ void err_on_hypervisor(void)
char *buffer;
/* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
- cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
+ cpuinfo = fopen_or_die("/proc/cpuinfo", "r");
buffer = malloc(4096);
if (!buffer) {
@@ -659,6 +662,11 @@ void err_on_hypervisor(void)
}
flags = strstr(buffer, "flags");
+ if (!flags) {
+ fclose(cpuinfo);
+ free(buffer);
+ err(1, "Failed to find 'flags' in /proc/cpuinfo");
+ }
rewind(cpuinfo);
fseek(cpuinfo, flags - buffer, SEEK_SET);
if (!fgets(buffer, 4096, cpuinfo)) {
@@ -683,10 +691,12 @@ int get_msr(int cpu, int offset, unsigned long long *msr)
char pathname[32];
int fd;
- sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r %s, or run as root",
+ pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr");
+
retval = pread(fd, msr, sizeof(*msr), offset);
if (retval != sizeof(*msr)) {
@@ -707,10 +717,11 @@ int put_msr(int cpu, int offset, unsigned long long new_msr)
int retval;
int fd;
- sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDWR);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r %s, or run as root",
+ pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr");
retval = pwrite(fd, &new_msr, sizeof(new_msr), offset);
if (retval != sizeof(new_msr))
@@ -809,7 +820,7 @@ void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
}
-void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+void read_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr;
@@ -823,7 +834,7 @@ void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr
hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
}
-void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+void write_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr = 0;
@@ -843,7 +854,7 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
put_msr(cpu, msr_offset, msr);
}
-static int get_epb(int cpu)
+static int get_epb_sysfs(int cpu)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
@@ -865,7 +876,7 @@ static int get_epb(int cpu)
return (int)val;
}
-static int set_epb(int cpu, int val)
+static int set_epb_sysfs(int cpu, int val)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
@@ -895,14 +906,14 @@ int print_cpu_msrs(int cpu)
struct msr_hwp_cap cap;
int epb;
- epb = get_epb(cpu);
+ epb = get_epb_sysfs(cpu);
if (epb >= 0)
printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);
if (!has_hwp)
return 0;
- read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
+ read_hwp_request_msr(cpu, &req, MSR_HWP_REQUEST);
print_hwp_request(cpu, &req, "");
read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
@@ -919,7 +930,7 @@ int print_pkg_msrs(int pkg)
if (!has_hwp)
return 0;
- read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
+ read_hwp_request_msr(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
print_hwp_request_pkg(pkg, &req, "");
if (has_hwp_notify) {
@@ -951,8 +962,10 @@ int ratio_2_sysfs_khz(int ratio)
}
/*
* If HWP is enabled and cpufreq sysfs attribtes are present,
- * then update sysfs, so that it will not become
- * stale when we write to MSRs.
+ * then update via sysfs. The intel_pstate driver may modify (clip)
+ * this request, say, when HWP_CAP is outside of PLATFORM_INFO limits,
+ * and the driver-chosen value takes precidence.
+ *
* (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
* so we don't have to touch that.)
*/
@@ -1007,6 +1020,8 @@ int update_sysfs(int cpu)
if (update_hwp_max)
update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
+ hwp_limits_done_via_sysfs = 1;
+
return 0;
}
@@ -1074,21 +1089,21 @@ int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, s
return 0;
}
-int update_hwp_request(int cpu)
+int update_hwp_request_msr(int cpu)
{
struct msr_hwp_request req;
struct msr_hwp_cap cap;
int msr_offset = MSR_HWP_REQUEST;
- read_hwp_request(cpu, &req, msr_offset);
+ read_hwp_request_msr(cpu, &req, msr_offset);
if (debug)
print_hwp_request(cpu, &req, "old: ");
- if (update_hwp_min)
+ if (update_hwp_min && !hwp_limits_done_via_sysfs)
req.hwp_min = req_update.hwp_min;
- if (update_hwp_max)
+ if (update_hwp_max && !hwp_limits_done_via_sysfs)
req.hwp_max = req_update.hwp_max;
if (update_hwp_desired)
@@ -1111,15 +1126,15 @@ int update_hwp_request(int cpu)
verify_hwp_req_self_consistency(cpu, &req);
- write_hwp_request(cpu, &req, msr_offset);
+ write_hwp_request_msr(cpu, &req, msr_offset);
if (debug) {
- read_hwp_request(cpu, &req, msr_offset);
+ read_hwp_request_msr(cpu, &req, msr_offset);
print_hwp_request(cpu, &req, "new: ");
}
return 0;
}
-int update_hwp_request_pkg(int pkg)
+int update_hwp_request_pkg_msr(int pkg)
{
struct msr_hwp_request req;
struct msr_hwp_cap cap;
@@ -1127,7 +1142,7 @@ int update_hwp_request_pkg(int pkg)
int msr_offset = MSR_HWP_REQUEST_PKG;
- read_hwp_request(cpu, &req, msr_offset);
+ read_hwp_request_msr(cpu, &req, msr_offset);
if (debug)
print_hwp_request_pkg(pkg, &req, "old: ");
@@ -1155,10 +1170,10 @@ int update_hwp_request_pkg(int pkg)
verify_hwp_req_self_consistency(cpu, &req);
- write_hwp_request(cpu, &req, msr_offset);
+ write_hwp_request_msr(cpu, &req, msr_offset);
if (debug) {
- read_hwp_request(cpu, &req, msr_offset);
+ read_hwp_request_msr(cpu, &req, msr_offset);
print_hwp_request_pkg(pkg, &req, "new: ");
}
return 0;
@@ -1166,30 +1181,39 @@ int update_hwp_request_pkg(int pkg)
int enable_hwp_on_cpu(int cpu)
{
- unsigned long long msr;
+ unsigned long long old_msr, new_msr;
+
+ get_msr(cpu, MSR_PM_ENABLE, &old_msr);
+
+ if (old_msr & 1)
+ return 0; /* already enabled */
- get_msr(cpu, MSR_PM_ENABLE, &msr);
- put_msr(cpu, MSR_PM_ENABLE, 1);
+ new_msr = old_msr | 1;
+ put_msr(cpu, MSR_PM_ENABLE, new_msr);
if (verbose)
- printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
+ printf("cpu%d: MSR_PM_ENABLE old: %llX new: %llX\n", cpu, old_msr, new_msr);
return 0;
}
-int update_cpu_msrs(int cpu)
+int update_cpu_epb_sysfs(int cpu)
{
- unsigned long long msr;
int epb;
- if (update_epb) {
- epb = get_epb(cpu);
- set_epb(cpu, new_epb);
+ epb = get_epb_sysfs(cpu);
+ set_epb_sysfs(cpu, new_epb);
- if (verbose)
- printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
- cpu, epb, (unsigned int) new_epb);
- }
+ if (verbose)
+ printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
+ cpu, epb, (unsigned int) new_epb);
+
+ return 0;
+}
+
+int update_cpu_msrs(int cpu)
+{
+ unsigned long long msr;
if (update_turbo) {
int turbo_is_present_and_disabled;
@@ -1224,7 +1248,7 @@ int update_cpu_msrs(int cpu)
if (!hwp_update_enabled())
return 0;
- update_hwp_request(cpu);
+ update_hwp_request_msr(cpu);
return 0;
}
@@ -1312,6 +1336,17 @@ void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
func(cpu_num);
}
+int for_all_cpus_in_set_and(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
+{
+ int cpu_num;
+ int retval = 1;
+
+ for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
+ if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
+ retval &= func(cpu_num);
+
+ return retval;
+}
void init_data_structures(void)
{
@@ -1326,21 +1361,38 @@ void init_data_structures(void)
for_all_proc_cpus(mark_cpu_present);
}
-/* clear has_hwp if it is not enable (or being enabled) */
+int is_hwp_enabled_on_cpu(int cpu_num)
+{
+ unsigned long long msr;
+ int retval;
+
+ /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+ get_msr(cpu_num, MSR_PM_ENABLE, &msr);
+ retval = (msr & 1);
+
+ if (verbose)
+ fprintf(stderr, "cpu%d: %sHWP\n", cpu_num, retval ? "" : "No-");
+
+ return retval;
+}
+/*
+ * verify_hwp_is_enabled()
+ *
+ * Set (has_hwp=0) if no HWP feature or any of selected CPU set does not have HWP enabled
+ */
void verify_hwp_is_enabled(void)
{
- unsigned long long msr;
+ int retval;
if (!has_hwp) /* set in early_cpuid() */
return;
- /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
- get_msr(base_cpu, MSR_PM_ENABLE, &msr);
- if ((msr & 1) == 0) {
+ retval = for_all_cpus_in_set_and(cpu_setsize, cpu_selected_set, is_hwp_enabled_on_cpu);
+
+ if (retval == 0) {
fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
has_hwp = 0;
- return;
}
}
@@ -1379,16 +1431,32 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
}
+static void probe_android_msr_path(void)
+{
+ struct stat sb;
+ char test_path[32];
+
+ sprintf(test_path, "/dev/msr%d", base_cpu);
+ if (stat(test_path, &sb) == 0)
+ use_android_msr_path = 1;
+}
void probe_dev_msr(void)
{
struct stat sb;
char pathname[32];
- sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
- if (stat(pathname, &sb))
- if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ probe_android_msr_path();
+
+ sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb)) {
+ if (system("/sbin/modprobe msr > /dev/null 2>&1")) {
+ if (use_android_msr_path)
+ err(-5, "no /dev/msr0, Try \"# modprobe msr\" ");
+ else
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ }
+ }
}
static void get_cpuid_or_exit(unsigned int leaf,
@@ -1505,6 +1573,7 @@ void parse_cpuid(void)
int main(int argc, char **argv)
{
set_base_cpu();
+
probe_dev_msr();
init_data_structures();
@@ -1551,10 +1620,13 @@ int main(int argc, char **argv)
/* update CPU set */
if (cpu_selected_set) {
+ if (update_epb)
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_epb_sysfs);
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
+
} else if (pkg_selected_set)
- for_packages(pkg_selected_set, update_hwp_request_pkg);
+ for_packages(pkg_selected_set, update_hwp_request_pkg_msr);
return 0;
}
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
index d68780e2e03d..e4bda2474060 100644
--- a/tools/sched_ext/Makefile
+++ b/tools/sched_ext/Makefile
@@ -133,6 +133,7 @@ $(MAKE_DIRS):
$(call msg,MKDIR,,$@)
$(Q)mkdir -p $@
+ifneq ($(CROSS_COMPILE),)
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(OBJ_DIR)/libbpf
@@ -141,6 +142,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
EXTRA_CFLAGS='-g -O0 -fPIC' \
LDFLAGS="$(LDFLAGS)" \
DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
+endif
$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
@@ -187,7 +189,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP
SCX_COMMON_DEPS := include/scx/common.h include/scx/user_exit_info.h | $(BINDIR)
-c-sched-targets = scx_simple scx_qmap scx_central scx_flatcg
+c-sched-targets = scx_simple scx_cpu0 scx_qmap scx_central scx_flatcg
$(addprefix $(BINDIR)/,$(c-sched-targets)): \
$(BINDIR)/%: \
diff --git a/tools/sched_ext/include/scx/bpf_arena_common.bpf.h b/tools/sched_ext/include/scx/bpf_arena_common.bpf.h
new file mode 100644
index 000000000000..4366fb3c91ce
--- /dev/null
+++ b/tools/sched_ext/include/scx/bpf_arena_common.bpf.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ *
+ * This is a workaround for LLVM compiler versions without
+ * __BPF_FEATURE_ADDR_SPACE_CAST that do not automatically cast between arena
+ * pointers and native kernel/userspace ones. In this case we explicitly do so
+ * with cast_kern() and cast_user(). E.g., in the Linux kernel tree,
+ * tools/testing/selftests/bpf includes tests that use these macros to implement
+ * linked lists and hashtables backed by arena memory. In sched_ext, we use
+ * cast_kern() and cast_user() for compatibility with older LLVM toolchains.
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
+#define __arena
+#define __arena_global SEC(".addr_space.1")
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef TEST
+#define can_loop true
+#define __cond_break(expr) expr
+#else
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#endif /* __BPF_FEATURE_MAY_GOTO */
+#endif /* TEST */
+
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
+
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
diff --git a/tools/sched_ext/include/scx/bpf_arena_common.h b/tools/sched_ext/include/scx/bpf_arena_common.h
new file mode 100644
index 000000000000..10141db0b59d
--- /dev/null
+++ b/tools/sched_ext/include/scx/bpf_arena_common.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+/* Provide the definition of PAGE_SIZE. */
+#include <sys/user.h>
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+char __attribute__((weak)) arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index d4e21558e982..821d5791bd42 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -24,14 +24,26 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <asm-generic/errno.h>
-#include "user_exit_info.h"
+#include "user_exit_info.bpf.h"
#include "enum_defs.autogen.h"
+#define PF_IDLE 0x00000002 /* I am an IDLE thread */
+#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
+#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
+#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_EXITING 0x00000004
#define CLOCK_MONOTONIC 1
+#ifndef NR_CPUS
+#define NR_CPUS 1024
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
extern int LINUX_KERNEL_VERSION __kconfig;
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
@@ -48,21 +60,15 @@ static inline void ___vmlinux_h_sanity_check___(void)
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
-s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
- const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
-void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
-void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+s32 __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed,
+ struct scx_bpf_select_cpu_and_args *args) __ksym __weak;
+bool __scx_bpf_dsq_insert_vtime(struct task_struct *p, struct scx_bpf_dsq_insert_vtime_args *args) __ksym __weak;
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym;
-bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;
-void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
-void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
-bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-u32 scx_bpf_reenqueue_local(void) __ksym;
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
+struct task_struct *scx_bpf_dsq_peek(u64 dsq_id) __ksym __weak;
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, u64 flags) __ksym __weak;
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
@@ -91,7 +97,8 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
-struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
+struct rq *scx_bpf_locked_rq(void) __ksym;
+struct task_struct *scx_bpf_cpu_curr(s32 cpu) __ksym __weak;
u64 scx_bpf_now(void) __ksym __weak;
void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
@@ -107,6 +114,9 @@ void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
+#define SCX_STRINGIFY(x) #x
+#define SCX_TOSTRING(x) SCX_STRINGIFY(x)
+
/*
* Helper macro for initializing the fmt and variadic argument inputs to both
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
@@ -141,13 +151,15 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
* instead of an array of u64. Invoking this macro will cause the scheduler to
* exit in an erroneous state, with diagnostic information being passed to the
- * user.
+ * user. It appends the file and line number to aid debugging.
*/
#define scx_bpf_error(fmt, args...) \
({ \
- scx_bpf_bstr_preamble(fmt, args) \
+ scx_bpf_bstr_preamble( \
+ __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args) \
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
- ___scx_bpf_bstr_format_checker(fmt, ##args); \
+ ___scx_bpf_bstr_format_checker( \
+ __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args); \
})
/*
@@ -229,6 +241,7 @@ BPF_PROG(name, ##args)
* be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
* `MEMBER_VPTR(ptr, ->member)`.
*/
+#ifndef MEMBER_VPTR
#define MEMBER_VPTR(base, member) (typeof((base) member) *) \
({ \
u64 __base = (u64)&(base); \
@@ -245,6 +258,7 @@ BPF_PROG(name, ##args)
[max]"i"(sizeof(base) - sizeof((base) member))); \
__addr; \
})
+#endif /* MEMBER_VPTR */
/**
* ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
@@ -260,6 +274,7 @@ BPF_PROG(name, ##args)
* size of the array to compute the max, which will result in rejection by
* the verifier.
*/
+#ifndef ARRAY_ELEM_PTR
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \
({ \
u64 __base = (u64)arr; \
@@ -274,7 +289,7 @@ BPF_PROG(name, ##args)
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
__addr; \
})
-
+#endif /* ARRAY_ELEM_PTR */
/*
* BPF declarations and helpers
@@ -438,8 +453,27 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
*/
static inline bool is_migration_disabled(const struct task_struct *p)
{
- if (bpf_core_field_exists(p->migration_disabled))
- return p->migration_disabled;
+ /*
+ * Testing p->migration_disabled in a BPF code is tricky because the
+ * migration is _always_ disabled while running the BPF code.
+ * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) for BPF
+ * code execution disable and re-enable the migration of the current
+ * task, respectively. So, the _current_ task of the sched_ext ops is
+ * always migration-disabled. Moreover, p->migration_disabled could be
+ * two or greater when a sched_ext ops BPF code (e.g., ops.tick) is
+ * executed in the middle of the other BPF code execution.
+ *
+ * Therefore, we should decide that the _current_ task is
+ * migration-disabled only when its migration_disabled count is greater
+ * than one. In other words, when p->migration_disabled == 1, there is
+ * an ambiguity, so we should check if @p is the current task or not.
+ */
+ if (bpf_core_field_exists(p->migration_disabled)) {
+ if (p->migration_disabled == 1)
+ return bpf_get_current_task_btf() != p;
+ else
+ return p->migration_disabled;
+ }
return false;
}
@@ -476,7 +510,7 @@ static inline s64 time_delta(u64 after, u64 before)
*/
static inline bool time_after(u64 a, u64 b)
{
- return (s64)(b - a) < 0;
+ return (s64)(b - a) < 0;
}
/**
@@ -500,7 +534,7 @@ static inline bool time_before(u64 a, u64 b)
*/
static inline bool time_after_eq(u64 a, u64 b)
{
- return (s64)(a - b) >= 0;
+ return (s64)(a - b) >= 0;
}
/**
@@ -547,9 +581,15 @@ static inline bool time_in_range_open(u64 a, u64 b, u64 c)
*/
/* useful compiler attributes */
+#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+#ifndef __maybe_unused
#define __maybe_unused __attribute__((__unused__))
+#endif
/*
* READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
@@ -633,6 +673,26 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
})
/*
+ * __calc_avg - Calculate exponential weighted moving average (EWMA) with
+ * @old and @new values. @decay represents how large the @old value remains.
+ * With a larger @decay value, the moving average changes slowly, exhibiting
+ * fewer fluctuations.
+ */
+#define __calc_avg(old, new, decay) ({ \
+ typeof(decay) thr = 1 << (decay); \
+ typeof(old) ret; \
+ if (((old) < thr) || ((new) < thr)) { \
+ if (((old) == 1) && ((new) == 0)) \
+ ret = 0; \
+ else \
+ ret = ((old) - ((old) >> 1)) + ((new) >> 1); \
+ } else { \
+ ret = ((old) - ((old) >> (decay))) + ((new) >> (decay)); \
+ } \
+ ret; \
+})
+
+/*
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
* @v: The value for which we're computing the base 2 logarithm.
*/
@@ -663,6 +723,25 @@ static inline u32 log2_u64(u64 v)
}
/*
+ * sqrt_u64 - Calculate the square root of value @x using Newton's method.
+ */
+static inline u64 __sqrt_u64(u64 x)
+{
+ if (x == 0 || x == 1)
+ return x;
+
+ u64 r = ((1ULL << 32) > x) ? x : (1ULL << 32);
+
+ for (int i = 0; i < 8; ++i) {
+ u64 q = x / r;
+ if (r <= q)
+ break;
+ r = (r + q) >> 1;
+ }
+ return r;
+}
+
+/*
* Return a value proportionally scaled to the task's weight.
*/
static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value)
diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h
index 1dc76bd84296..b3c6372bcf81 100644
--- a/tools/sched_ext/include/scx/common.h
+++ b/tools/sched_ext/include/scx/common.h
@@ -75,8 +75,9 @@ typedef int64_t s64;
#include "enums.h"
/* not available when building kernel tools/sched_ext */
-#if __has_include(<lib/sdt_task.h>)
-#include <lib/sdt_task.h>
+#if __has_include(<lib/sdt_task_defs.h>)
+#include "bpf_arena_common.h"
+#include <lib/sdt_task_defs.h>
#endif
#endif /* __SCHED_EXT_COMMON_H */
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 9252e1a00556..f2969c3061a7 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -16,114 +16,92 @@
})
/* v6.12: 819513666966 ("sched_ext: Add cgroup support") */
-#define __COMPAT_scx_bpf_task_cgroup(p) \
- (bpf_ksym_exists(scx_bpf_task_cgroup) ? \
- scx_bpf_task_cgroup((p)) : NULL)
+struct cgroup *scx_bpf_task_cgroup___new(struct task_struct *p) __ksym __weak;
+
+#define scx_bpf_task_cgroup(p) \
+ (bpf_ksym_exists(scx_bpf_task_cgroup___new) ? \
+ scx_bpf_task_cgroup___new((p)) : NULL)
/*
* v6.13: The verb `dispatch` was too overloaded and confusing. kfuncs are
* renamed to unload the verb.
*
- * Build error is triggered if old names are used. New binaries work with both
- * new and old names. The compat macros will be removed on v6.15 release.
- *
* scx_bpf_dispatch_from_dsq() and friends were added during v6.12 by
* 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()").
- * Preserve __COMPAT macros until v6.15.
*/
-void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
-void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
-bool scx_bpf_consume___compat(u64 dsq_id) __ksym __weak;
-void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
-void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
-bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-
-#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \
- (bpf_ksym_exists(scx_bpf_dsq_insert) ? \
- scx_bpf_dsq_insert((p), (dsq_id), (slice), (enq_flags)) : \
- scx_bpf_dispatch___compat((p), (dsq_id), (slice), (enq_flags)))
-
-#define scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags) \
- (bpf_ksym_exists(scx_bpf_dsq_insert_vtime) ? \
- scx_bpf_dsq_insert_vtime((p), (dsq_id), (slice), (vtime), (enq_flags)) : \
- scx_bpf_dispatch_vtime___compat((p), (dsq_id), (slice), (vtime), (enq_flags)))
+bool scx_bpf_dsq_move_to_local___new(u64 dsq_id) __ksym __weak;
+void scx_bpf_dsq_move_set_slice___new(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dsq_move_set_vtime___new(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
+bool scx_bpf_dsq_move___new(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+bool scx_bpf_dsq_move_vtime___new(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+
+bool scx_bpf_consume___old(u64 dsq_id) __ksym __weak;
+void scx_bpf_dispatch_from_dsq_set_slice___old(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dispatch_from_dsq_set_vtime___old(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
+bool scx_bpf_dispatch_from_dsq___old(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+bool scx_bpf_dispatch_vtime_from_dsq___old(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
#define scx_bpf_dsq_move_to_local(dsq_id) \
- (bpf_ksym_exists(scx_bpf_dsq_move_to_local) ? \
- scx_bpf_dsq_move_to_local((dsq_id)) : \
- scx_bpf_consume___compat((dsq_id)))
-
-#define __COMPAT_scx_bpf_dsq_move_set_slice(it__iter, slice) \
- (bpf_ksym_exists(scx_bpf_dsq_move_set_slice) ? \
- scx_bpf_dsq_move_set_slice((it__iter), (slice)) : \
- (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___compat) ? \
- scx_bpf_dispatch_from_dsq_set_slice___compat((it__iter), (slice)) : \
+ (bpf_ksym_exists(scx_bpf_dsq_move_to_local___new) ? \
+ scx_bpf_dsq_move_to_local___new((dsq_id)) : \
+ scx_bpf_consume___old((dsq_id)))
+
+#define scx_bpf_dsq_move_set_slice(it__iter, slice) \
+ (bpf_ksym_exists(scx_bpf_dsq_move_set_slice___new) ? \
+ scx_bpf_dsq_move_set_slice___new((it__iter), (slice)) : \
+ (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___old) ? \
+ scx_bpf_dispatch_from_dsq_set_slice___old((it__iter), (slice)) : \
(void)0))
-#define __COMPAT_scx_bpf_dsq_move_set_vtime(it__iter, vtime) \
- (bpf_ksym_exists(scx_bpf_dsq_move_set_vtime) ? \
- scx_bpf_dsq_move_set_vtime((it__iter), (vtime)) : \
- (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___compat) ? \
- scx_bpf_dispatch_from_dsq_set_vtime___compat((it__iter), (vtime)) : \
- (void) 0))
-
-#define __COMPAT_scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags) \
- (bpf_ksym_exists(scx_bpf_dsq_move) ? \
- scx_bpf_dsq_move((it__iter), (p), (dsq_id), (enq_flags)) : \
- (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___compat) ? \
- scx_bpf_dispatch_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
- false))
+#define scx_bpf_dsq_move_set_vtime(it__iter, vtime) \
+ (bpf_ksym_exists(scx_bpf_dsq_move_set_vtime___new) ? \
+ scx_bpf_dsq_move_set_vtime___new((it__iter), (vtime)) : \
+ (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___old) ? \
+ scx_bpf_dispatch_from_dsq_set_vtime___old((it__iter), (vtime)) : \
+ (void)0))
-#define __COMPAT_scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags) \
- (bpf_ksym_exists(scx_bpf_dsq_move_vtime) ? \
- scx_bpf_dsq_move_vtime((it__iter), (p), (dsq_id), (enq_flags)) : \
- (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___compat) ? \
- scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
+#define scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags) \
+ (bpf_ksym_exists(scx_bpf_dsq_move___new) ? \
+ scx_bpf_dsq_move___new((it__iter), (p), (dsq_id), (enq_flags)) : \
+ (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___old) ? \
+ scx_bpf_dispatch_from_dsq___old((it__iter), (p), (dsq_id), (enq_flags)) : \
false))
-#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \
- _Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
-
-#define scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags) \
- _Static_assert(false, "scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()")
-
-#define scx_bpf_consume(dsq_id) ({ \
- _Static_assert(false, "scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()"); \
- false; \
-})
-
-#define scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \
- _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()")
-
-#define scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \
- _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()")
-
-#define scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \
- _Static_assert(false, "scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()"); \
- false; \
-})
-
-#define scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \
- _Static_assert(false, "scx_bpf_dispatch_vtime_from_dsq() renamed to scx_bpf_dsq_move_vtime()"); \
- false; \
-})
-
-#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \
- _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_slice() renamed to __COMPAT_scx_bpf_dsq_move_set_slice()")
+#define scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags) \
+ (bpf_ksym_exists(scx_bpf_dsq_move_vtime___new) ? \
+ scx_bpf_dsq_move_vtime___new((it__iter), (p), (dsq_id), (enq_flags)) : \
+ (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___old) ? \
+ scx_bpf_dispatch_vtime_from_dsq___old((it__iter), (p), (dsq_id), (enq_flags)) : \
+ false))
-#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \
- _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_vtime() renamed to __COMPAT_scx_bpf_dsq_move_set_vtime()")
+/*
+ * v6.15: 950ad93df2fc ("bpf: add kfunc for populating cpumask bits")
+ *
+ * Compat macro will be dropped on v6.19 release.
+ */
+int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
-#define __COMPAT_scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \
- _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move()"); \
- false; \
-})
+#define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz) \
+ (bpf_ksym_exists(bpf_cpumask_populate) ? \
+ (bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)
-#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \
- _Static_assert(false, "__COMPAT_scx_bpf_dispatch_vtime_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move_vtime()"); \
- false; \
-})
+/*
+ * v6.19: Introduce lockless peek API for user DSQs.
+ *
+ * Preserve the following macro until v6.21.
+ */
+static inline struct task_struct *__COMPAT_scx_bpf_dsq_peek(u64 dsq_id)
+{
+ struct task_struct *p = NULL;
+ struct bpf_iter_scx_dsq it;
+
+ if (bpf_ksym_exists(scx_bpf_dsq_peek))
+ return scx_bpf_dsq_peek(dsq_id);
+ if (!bpf_iter_scx_dsq_new(&it, dsq_id, 0))
+ p = bpf_iter_scx_dsq_next(&it);
+ bpf_iter_scx_dsq_destroy(&it);
+ return p;
+}
/**
* __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
@@ -226,6 +204,178 @@ static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
scx_bpf_pick_any_cpu(cpus_allowed, flags))
/*
+ * v6.18: Add a helper to retrieve the current task running on a CPU.
+ *
+ * Keep this helper available until v6.20 for compatibility.
+ */
+static inline struct task_struct *__COMPAT_scx_bpf_cpu_curr(int cpu)
+{
+ struct rq *rq;
+
+ if (bpf_ksym_exists(scx_bpf_cpu_curr))
+ return scx_bpf_cpu_curr(cpu);
+
+ rq = scx_bpf_cpu_rq(cpu);
+
+ return rq ? rq->curr : NULL;
+}
+
+/*
+ * v6.19: To work around BPF maximum parameter limit, the following kfuncs are
+ * replaced with variants that pack scalar arguments in a struct. Wrappers are
+ * provided to maintain source compatibility.
+ *
+ * v6.13: scx_bpf_dsq_insert_vtime() renaming is also handled here. See the
+ * block on dispatch renaming above for more details.
+ *
+ * The kernel will carry the compat variants until v6.23 to maintain binary
+ * compatibility. After v6.23 release, remove the compat handling and move the
+ * wrappers to common.bpf.h.
+ */
+s32 scx_bpf_select_cpu_and___compat(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
+void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+void scx_bpf_dsq_insert_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+
+/**
+ * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p
+ * @p: task_struct to select a CPU for
+ * @prev_cpu: CPU @p was on previously
+ * @wake_flags: %SCX_WAKE_* flags
+ * @cpus_allowed: cpumask of allowed CPUs
+ * @flags: %SCX_PICK_IDLE* flags
+ *
+ * Inline wrapper that packs scalar arguments into a struct and calls
+ * __scx_bpf_select_cpu_and(). See __scx_bpf_select_cpu_and() for details.
+ */
+static inline s32
+scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags)
+{
+ if (bpf_core_type_exists(struct scx_bpf_select_cpu_and_args)) {
+ struct scx_bpf_select_cpu_and_args args = {
+ .prev_cpu = prev_cpu,
+ .wake_flags = wake_flags,
+ .flags = flags,
+ };
+
+ return __scx_bpf_select_cpu_and(p, cpus_allowed, &args);
+ } else {
+ return scx_bpf_select_cpu_and___compat(p, prev_cpu, wake_flags,
+ cpus_allowed, flags);
+ }
+}
+
+/**
+ * scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ
+ * @p: task_struct to insert
+ * @dsq_id: DSQ to insert into
+ * @slice: duration @p can run for in nsecs, 0 to keep the current value
+ * @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
+ * @enq_flags: SCX_ENQ_*
+ *
+ * Inline wrapper that packs scalar arguments into a struct and calls
+ * __scx_bpf_dsq_insert_vtime(). See __scx_bpf_dsq_insert_vtime() for details.
+ */
+static inline bool
+scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime,
+ u64 enq_flags)
+{
+ if (bpf_core_type_exists(struct scx_bpf_dsq_insert_vtime_args)) {
+ struct scx_bpf_dsq_insert_vtime_args args = {
+ .dsq_id = dsq_id,
+ .slice = slice,
+ .vtime = vtime,
+ .enq_flags = enq_flags,
+ };
+
+ return __scx_bpf_dsq_insert_vtime(p, &args);
+ } else if (bpf_ksym_exists(scx_bpf_dsq_insert_vtime___compat)) {
+ scx_bpf_dsq_insert_vtime___compat(p, dsq_id, slice, vtime,
+ enq_flags);
+ return true;
+ } else {
+ scx_bpf_dispatch_vtime___compat(p, dsq_id, slice, vtime,
+ enq_flags);
+ return true;
+ }
+}
+
+/*
+ * v6.19: scx_bpf_dsq_insert() now returns bool instead of void. Move
+ * scx_bpf_dsq_insert() decl to common.bpf.h and drop compat helper after v6.22.
+ * The extra ___compat suffix is to work around libbpf not ignoring __SUFFIX on
+ * kernel side. The entire suffix can be dropped later.
+ *
+ * v6.13: scx_bpf_dsq_insert() renaming is also handled here. See the block on
+ * dispatch renaming above for more details.
+ */
+bool scx_bpf_dsq_insert___v2___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+void scx_bpf_dsq_insert___v1(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+
+static inline bool
+scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags)
+{
+ if (bpf_ksym_exists(scx_bpf_dsq_insert___v2___compat)) {
+ return scx_bpf_dsq_insert___v2___compat(p, dsq_id, slice, enq_flags);
+ } else if (bpf_ksym_exists(scx_bpf_dsq_insert___v1)) {
+ scx_bpf_dsq_insert___v1(p, dsq_id, slice, enq_flags);
+ return true;
+ } else {
+ scx_bpf_dispatch___compat(p, dsq_id, slice, enq_flags);
+ return true;
+ }
+}
+
+/*
+ * v6.19: scx_bpf_task_set_slice() and scx_bpf_task_set_dsq_vtime() added to for
+ * sub-sched authority checks. Drop the wrappers and move the decls to
+ * common.bpf.h after v6.22.
+ */
+bool scx_bpf_task_set_slice___new(struct task_struct *p, u64 slice) __ksym __weak;
+bool scx_bpf_task_set_dsq_vtime___new(struct task_struct *p, u64 vtime) __ksym __weak;
+
+static inline void scx_bpf_task_set_slice(struct task_struct *p, u64 slice)
+{
+ if (bpf_ksym_exists(scx_bpf_task_set_slice___new))
+ scx_bpf_task_set_slice___new(p, slice);
+ else
+ p->scx.slice = slice;
+}
+
+static inline void scx_bpf_task_set_dsq_vtime(struct task_struct *p, u64 vtime)
+{
+ if (bpf_ksym_exists(scx_bpf_task_set_dsq_vtime___new))
+ scx_bpf_task_set_dsq_vtime___new(p, vtime);
+ else
+ p->scx.dsq_vtime = vtime;
+}
+
+/*
+ * v6.19: The new void variant can be called from anywhere while the older v1
+ * variant can only be called from ops.cpu_release(). The double ___ prefixes on
+ * the v2 variant need to be removed once libbpf is updated to ignore ___ prefix
+ * on kernel side. Drop the wrapper and move the decl to common.bpf.h after
+ * v6.22.
+ */
+u32 scx_bpf_reenqueue_local___v1(void) __ksym __weak;
+void scx_bpf_reenqueue_local___v2___compat(void) __ksym __weak;
+
+static inline bool __COMPAT_scx_bpf_reenqueue_local_from_anywhere(void)
+{
+ return bpf_ksym_exists(scx_bpf_reenqueue_local___v2___compat);
+}
+
+static inline void scx_bpf_reenqueue_local(void)
+{
+ if (__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+ scx_bpf_reenqueue_local___v2___compat();
+ else
+ scx_bpf_reenqueue_local___v1();
+}
+
+/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
*/
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
index 35c67c5174ac..8b4897fc8b99 100644
--- a/tools/sched_ext/include/scx/compat.h
+++ b/tools/sched_ext/include/scx/compat.h
@@ -151,6 +151,10 @@ static inline long scx_hotplug_seq(void)
*
* ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext is
* the current minimum required kernel version.
+ *
+ * COMPAT:
+ * - v6.17: ops.cgroup_set_bandwidth()
+ * - v6.19: ops.cgroup_set_idle()
*/
#define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \
struct __scx_name *__skel; \
@@ -162,6 +166,16 @@ static inline long scx_hotplug_seq(void)
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
SCX_ENUM_INIT(__skel); \
+ if (__skel->struct_ops.__ops_name->cgroup_set_bandwidth && \
+ !__COMPAT_struct_has_field("sched_ext_ops", "cgroup_set_bandwidth")) { \
+ fprintf(stderr, "WARNING: kernel doesn't support ops.cgroup_set_bandwidth()\n"); \
+ __skel->struct_ops.__ops_name->cgroup_set_bandwidth = NULL; \
+ } \
+ if (__skel->struct_ops.__ops_name->cgroup_set_idle && \
+ !__COMPAT_struct_has_field("sched_ext_ops", "cgroup_set_idle")) { \
+ fprintf(stderr, "WARNING: kernel doesn't support ops.cgroup_set_idle()\n"); \
+ __skel->struct_ops.__ops_name->cgroup_set_idle = NULL; \
+ } \
__skel; \
})
diff --git a/tools/sched_ext/include/scx/user_exit_info.bpf.h b/tools/sched_ext/include/scx/user_exit_info.bpf.h
new file mode 100644
index 000000000000..e7ac6611a990
--- /dev/null
+++ b/tools/sched_ext/include/scx/user_exit_info.bpf.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+
+#ifndef __USER_EXIT_INFO_BPF_H
+#define __USER_EXIT_INFO_BPF_H
+
+#ifndef LSP
+#include "vmlinux.h"
+#endif
+#include <bpf/bpf_core_read.h>
+
+#include "user_exit_info_common.h"
+
+#define UEI_DEFINE(__name) \
+ char RESIZABLE_ARRAY(data, __name##_dump); \
+ const volatile u32 __name##_dump_len; \
+ struct user_exit_info __name SEC(".data")
+
+#define UEI_RECORD(__uei_name, __ei) ({ \
+ bpf_probe_read_kernel_str(__uei_name.reason, \
+ sizeof(__uei_name.reason), (__ei)->reason); \
+ bpf_probe_read_kernel_str(__uei_name.msg, \
+ sizeof(__uei_name.msg), (__ei)->msg); \
+ bpf_probe_read_kernel_str(__uei_name##_dump, \
+ __uei_name##_dump_len, (__ei)->dump); \
+ if (bpf_core_field_exists((__ei)->exit_code)) \
+ __uei_name.exit_code = (__ei)->exit_code; \
+ /* use __sync to force memory barrier */ \
+ __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
+ (__ei)->kind); \
+})
+
+#endif /* __USER_EXIT_INFO_BPF_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h
index 66f856640ee7..399697fa372f 100644
--- a/tools/sched_ext/include/scx/user_exit_info.h
+++ b/tools/sched_ext/include/scx/user_exit_info.h
@@ -10,55 +10,11 @@
#ifndef __USER_EXIT_INFO_H
#define __USER_EXIT_INFO_H
-#ifdef LSP
-#define __bpf__
-#include "../vmlinux.h"
-#endif
-
-enum uei_sizes {
- UEI_REASON_LEN = 128,
- UEI_MSG_LEN = 1024,
- UEI_DUMP_DFL_LEN = 32768,
-};
-
-struct user_exit_info {
- int kind;
- s64 exit_code;
- char reason[UEI_REASON_LEN];
- char msg[UEI_MSG_LEN];
-};
-
-#ifdef __bpf__
-
-#ifndef LSP
-#include "vmlinux.h"
-#endif
-#include <bpf/bpf_core_read.h>
-
-#define UEI_DEFINE(__name) \
- char RESIZABLE_ARRAY(data, __name##_dump); \
- const volatile u32 __name##_dump_len; \
- struct user_exit_info __name SEC(".data")
-
-#define UEI_RECORD(__uei_name, __ei) ({ \
- bpf_probe_read_kernel_str(__uei_name.reason, \
- sizeof(__uei_name.reason), (__ei)->reason); \
- bpf_probe_read_kernel_str(__uei_name.msg, \
- sizeof(__uei_name.msg), (__ei)->msg); \
- bpf_probe_read_kernel_str(__uei_name##_dump, \
- __uei_name##_dump_len, (__ei)->dump); \
- if (bpf_core_field_exists((__ei)->exit_code)) \
- __uei_name.exit_code = (__ei)->exit_code; \
- /* use __sync to force memory barrier */ \
- __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
- (__ei)->kind); \
-})
-
-#else /* !__bpf__ */
-
#include <stdio.h>
#include <stdbool.h>
+#include "user_exit_info_common.h"
+
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
@@ -114,5 +70,4 @@ enum uei_ecode_mask {
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
-#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info_common.h b/tools/sched_ext/include/scx/user_exit_info_common.h
new file mode 100644
index 000000000000..2d0981aedd89
--- /dev/null
+++ b/tools/sched_ext/include/scx/user_exit_info_common.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+#ifndef __USER_EXIT_INFO_COMMON_H
+#define __USER_EXIT_INFO_COMMON_H
+
+#ifdef LSP
+#include "../vmlinux.h"
+#endif
+
+enum uei_sizes {
+ UEI_REASON_LEN = 128,
+ UEI_MSG_LEN = 1024,
+ UEI_DUMP_DFL_LEN = 32768,
+};
+
+struct user_exit_info {
+ int kind;
+ s64 exit_code;
+ char reason[UEI_REASON_LEN];
+ char msg[UEI_MSG_LEN];
+};
+
+#endif /* __USER_EXIT_INFO_COMMON_H */
diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c
index 50bc1737c167..55df8b798865 100644
--- a/tools/sched_ext/scx_central.bpf.c
+++ b/tools/sched_ext/scx_central.bpf.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * A central FIFO sched_ext scheduler which demonstrates the followings:
+ * A central FIFO sched_ext scheduler which demonstrates the following:
*
* a. Making all scheduling decisions from one CPU:
*
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 6ba6e610eeaa..55931a4cd71c 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -61,6 +61,7 @@ restart:
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
+ assert(skel->rodata->nr_cpu_ids > 0);
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
diff --git a/tools/sched_ext/scx_cpu0.bpf.c b/tools/sched_ext/scx_cpu0.bpf.c
new file mode 100644
index 000000000000..6326ce598c8e
--- /dev/null
+++ b/tools/sched_ext/scx_cpu0.bpf.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A CPU0 scheduler.
+ *
+ * This scheduler queues all tasks to a shared DSQ and only dispatches them on
+ * CPU0 in FIFO order. This is useful for testing bypass behavior when many
+ * tasks are concentrated on a single CPU. If the load balancer doesn't work,
+ * bypass mode can trigger task hangs or RCU stalls as the queue is long and
+ * there's only one CPU working on it.
+ *
+ * - Statistics tracking how many tasks are queued to local and CPU0 DSQs.
+ * - Termination notification for userspace.
+ *
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Tejun Heo <tj@kernel.org>
+ */
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
+
+UEI_DEFINE(uei);
+
+/*
+ * We create a custom DSQ with ID 0 that we dispatch to and consume from on
+ * CPU0.
+ */
+#define DSQ_CPU0 0
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+ __uint(max_entries, 2); /* [local, cpu0] */
+} stats SEC(".maps");
+
+static void stat_inc(u32 idx)
+{
+ u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
+ if (cnt_p)
+ (*cnt_p)++;
+}
+
+s32 BPF_STRUCT_OPS(cpu0_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+ return 0;
+}
+
+void BPF_STRUCT_OPS(cpu0_enqueue, struct task_struct *p, u64 enq_flags)
+{
+ /*
+ * select_cpu() always picks CPU0. If @p is not on CPU0, it can't run on
+ * CPU 0. Queue on whichever CPU it's currently only.
+ */
+ if (scx_bpf_task_cpu(p) != 0) {
+ stat_inc(0); /* count local queueing */
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
+ return;
+ }
+
+ stat_inc(1); /* count cpu0 queueing */
+ scx_bpf_dsq_insert(p, DSQ_CPU0, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(cpu0_dispatch, s32 cpu, struct task_struct *prev)
+{
+ if (cpu == 0)
+ scx_bpf_dsq_move_to_local(DSQ_CPU0);
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(cpu0_init)
+{
+ return scx_bpf_create_dsq(DSQ_CPU0, -1);
+}
+
+void BPF_STRUCT_OPS(cpu0_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SCX_OPS_DEFINE(cpu0_ops,
+ .select_cpu = (void *)cpu0_select_cpu,
+ .enqueue = (void *)cpu0_enqueue,
+ .dispatch = (void *)cpu0_dispatch,
+ .init = (void *)cpu0_init,
+ .exit = (void *)cpu0_exit,
+ .name = "cpu0");
diff --git a/tools/sched_ext/scx_cpu0.c b/tools/sched_ext/scx_cpu0.c
new file mode 100644
index 000000000000..1e4fa4ab8da9
--- /dev/null
+++ b/tools/sched_ext/scx_cpu0.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include "scx_cpu0.bpf.skel.h"
+
+const char help_fmt[] =
+"A cpu0 sched_ext scheduler.\n"
+"\n"
+"See the top-level comment in .bpf.c for more details.\n"
+"\n"
+"Usage: %s [-v]\n"
+"\n"
+" -v Print libbpf debug messages\n"
+" -h Display this help and exit\n";
+
+static bool verbose;
+static volatile int exit_req;
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static void sigint_handler(int sig)
+{
+ exit_req = 1;
+}
+
+static void read_stats(struct scx_cpu0 *skel, __u64 *stats)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ assert(nr_cpus > 0);
+ __u64 cnts[2][nr_cpus];
+ __u32 idx;
+
+ memset(stats, 0, sizeof(stats[0]) * 2);
+
+ for (idx = 0; idx < 2; idx++) {
+ int ret, cpu;
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
+ &idx, cnts[idx]);
+ if (ret < 0)
+ continue;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ stats[idx] += cnts[idx][cpu];
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct scx_cpu0 *skel;
+ struct bpf_link *link;
+ __u32 opt;
+ __u64 ecode;
+
+ libbpf_set_print(libbpf_print_fn);
+ signal(SIGINT, sigint_handler);
+ signal(SIGTERM, sigint_handler);
+restart:
+ skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0);
+
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ while ((opt = getopt(argc, argv, "vh")) != -1) {
+ switch (opt) {
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ fprintf(stderr, help_fmt, basename(argv[0]));
+ return opt != 'h';
+ }
+ }
+
+ SCX_OPS_LOAD(skel, cpu0_ops, scx_cpu0, uei);
+ link = SCX_OPS_ATTACH(skel, cpu0_ops, scx_cpu0);
+
+ while (!exit_req && !UEI_EXITED(skel, uei)) {
+ __u64 stats[2];
+
+ read_stats(skel, stats);
+ printf("local=%llu cpu0=%llu\n", stats[0], stats[1]);
+ fflush(stdout);
+ sleep(1);
+ }
+
+ bpf_link__destroy(link);
+ ecode = UEI_REPORT(skel, uei);
+ scx_cpu0__destroy(skel);
+
+ if (UEI_ECODE_RESTART(ecode))
+ goto restart;
+ return 0;
+}
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index fdc7170639e6..43126858b8e4 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -382,7 +382,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
return;
}
- cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+ cgrp = scx_bpf_task_cgroup(p);
cgc = find_cgrp_ctx(cgrp);
if (!cgc)
goto out_release;
@@ -508,7 +508,7 @@ void BPF_STRUCT_OPS(fcg_runnable, struct task_struct *p, u64 enq_flags)
{
struct cgroup *cgrp;
- cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+ cgrp = scx_bpf_task_cgroup(p);
update_active_weight_sums(cgrp, true);
bpf_cgroup_release(cgrp);
}
@@ -521,7 +521,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
if (fifo_sched)
return;
- cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+ cgrp = scx_bpf_task_cgroup(p);
cgc = find_cgrp_ctx(cgrp);
if (cgc) {
/*
@@ -564,7 +564,7 @@ void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable)
if (!taskc->bypassed_at)
return;
- cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+ cgrp = scx_bpf_task_cgroup(p);
cgc = find_cgrp_ctx(cgrp);
if (cgc) {
__sync_fetch_and_add(&cgc->cvtime_delta,
@@ -578,7 +578,7 @@ void BPF_STRUCT_OPS(fcg_quiescent, struct task_struct *p, u64 deq_flags)
{
struct cgroup *cgrp;
- cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+ cgrp = scx_bpf_task_cgroup(p);
update_active_weight_sums(cgrp, false);
bpf_cgroup_release(cgrp);
}
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
- .flags = SCX_OPS_ENQ_EXITING,
+ .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg");
diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c
index 6dd423eeb4ff..cd85eb401179 100644
--- a/tools/sched_ext/scx_flatcg.c
+++ b/tools/sched_ext/scx_flatcg.c
@@ -6,6 +6,7 @@
*/
#include <stdio.h>
#include <signal.h>
+#include <assert.h>
#include <unistd.h>
#include <libgen.h>
#include <limits.h>
@@ -137,6 +138,7 @@ restart:
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+ assert(skel->rodata->nr_cpus > 0);
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 69d877501cb7..df21fad0c438 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -39,7 +39,8 @@ const volatile u32 stall_kernel_nth;
const volatile u32 dsp_inf_loop_after;
const volatile u32 dsp_batch;
const volatile bool highpri_boosting;
-const volatile bool print_shared_dsq;
+const volatile bool print_dsqs_and_events;
+const volatile bool print_msgs;
const volatile s32 disallow_tgid;
const volatile bool suppress_dump;
@@ -56,7 +57,8 @@ struct qmap {
queue1 SEC(".maps"),
queue2 SEC(".maps"),
queue3 SEC(".maps"),
- queue4 SEC(".maps");
+ queue4 SEC(".maps"),
+ dump_store SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
@@ -200,6 +202,9 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
void *ring;
s32 cpu;
+ if (enq_flags & SCX_ENQ_REENQ)
+ __sync_fetch_and_add(&nr_reenqueued, 1);
+
if (p->flags & PF_KTHREAD) {
if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
return;
@@ -318,12 +323,9 @@ static bool dispatch_highpri(bool from_timer)
if (tctx->highpri) {
/* exercise the set_*() and vtime interface too */
- __COMPAT_scx_bpf_dsq_move_set_slice(
- BPF_FOR_EACH_ITER, slice_ns * 2);
- __COMPAT_scx_bpf_dsq_move_set_vtime(
- BPF_FOR_EACH_ITER, highpri_seq++);
- __COMPAT_scx_bpf_dsq_move_vtime(
- BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
+ scx_bpf_dsq_move_set_slice(BPF_FOR_EACH_ITER, slice_ns * 2);
+ scx_bpf_dsq_move_set_vtime(BPF_FOR_EACH_ITER, highpri_seq++);
+ scx_bpf_dsq_move_vtime(BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
}
}
@@ -340,9 +342,8 @@ static bool dispatch_highpri(bool from_timer)
else
cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0);
- if (__COMPAT_scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p,
- SCX_DSQ_LOCAL_ON | cpu,
- SCX_ENQ_PREEMPT)) {
+ if (scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p, SCX_DSQ_LOCAL_ON | cpu,
+ SCX_ENQ_PREEMPT)) {
if (cpu == this_cpu) {
dispatched = true;
__sync_fetch_and_add(&nr_expedited_local, 1);
@@ -531,20 +532,35 @@ bool BPF_STRUCT_OPS(qmap_core_sched_before,
return task_qdist(a) > task_qdist(b);
}
-void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
+SEC("tp_btf/sched_switch")
+int BPF_PROG(qmap_sched_switch, bool preempt, struct task_struct *prev,
+ struct task_struct *next, unsigned long prev_state)
{
- u32 cnt;
+ if (!__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+ return 0;
/*
- * Called when @cpu is taken by a higher priority scheduling class. This
- * makes @cpu no longer available for executing sched_ext tasks. As we
- * don't want the tasks in @cpu's local dsq to sit there until @cpu
- * becomes available again, re-enqueue them into the global dsq. See
- * %SCX_ENQ_REENQ handling in qmap_enqueue().
+ * If @cpu is taken by a higher priority scheduling class, it is no
+ * longer available for executing sched_ext tasks. As we don't want the
+ * tasks in @cpu's local dsq to sit there until @cpu becomes available
+ * again, re-enqueue them into the global dsq. See %SCX_ENQ_REENQ
+ * handling in qmap_enqueue().
*/
- cnt = scx_bpf_reenqueue_local();
- if (cnt)
- __sync_fetch_and_add(&nr_reenqueued, cnt);
+ switch (next->policy) {
+ case 1: /* SCHED_FIFO */
+ case 2: /* SCHED_RR */
+ case 6: /* SCHED_DEADLINE */
+ scx_bpf_reenqueue_local();
+ }
+
+ return 0;
+}
+
+void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
+{
+ /* see qmap_sched_switch() to learn how to do this on newer kernels */
+ if (!__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+ scx_bpf_reenqueue_local();
}
s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
@@ -578,11 +594,26 @@ void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
return;
scx_bpf_dump("QMAP FIFO[%d]:", i);
+
+ /*
+ * Dump can be invoked anytime and there is no way to iterate in
+ * a non-destructive way. Pop and store in dump_store and then
+ * restore afterwards. If racing against new enqueues, ordering
+ * can get mixed up.
+ */
bpf_repeat(4096) {
if (bpf_map_pop_elem(fifo, &pid))
break;
+ bpf_map_push_elem(&dump_store, &pid, 0);
scx_bpf_dump(" %d", pid);
}
+
+ bpf_repeat(4096) {
+ if (bpf_map_pop_elem(&dump_store, &pid))
+ break;
+ bpf_map_push_elem(fifo, &pid, 0);
+ }
+
scx_bpf_dump("\n");
}
}
@@ -617,22 +648,25 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc
s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args)
{
- bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
- cgrp->kn->id, args->weight, args->bw_period_us,
- args->bw_quota_us, args->bw_burst_us);
+ if (print_msgs)
+ bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
+ cgrp->kn->id, args->weight, args->bw_period_us,
+ args->bw_quota_us, args->bw_burst_us);
return 0;
}
void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
{
- bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
+ if (print_msgs)
+ bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
}
void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp,
u64 period_us, u64 quota_us, u64 burst_us)
{
- bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id,
- period_us, quota_us, burst_us);
+ if (print_msgs)
+ bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu",
+ cgrp->kn->id, period_us, quota_us, burst_us);
}
/*
@@ -676,16 +710,20 @@ static void print_cpus(void)
void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
{
- bpf_printk("CPU %d coming online", cpu);
- /* @cpu is already online at this point */
- print_cpus();
+ if (print_msgs) {
+ bpf_printk("CPU %d coming online", cpu);
+ /* @cpu is already online at this point */
+ print_cpus();
+ }
}
void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
{
- bpf_printk("CPU %d going offline", cpu);
- /* @cpu is still online at this point */
- print_cpus();
+ if (print_msgs) {
+ bpf_printk("CPU %d going offline", cpu);
+ /* @cpu is still online at this point */
+ print_cpus();
+ }
}
struct monitor_timer {
@@ -783,35 +821,36 @@ static void dump_shared_dsq(void)
static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
{
- struct scx_event_stats events;
-
bpf_rcu_read_lock();
dispatch_highpri(true);
bpf_rcu_read_unlock();
monitor_cpuperf();
- if (print_shared_dsq)
+ if (print_dsqs_and_events) {
+ struct scx_event_stats events;
+
dump_shared_dsq();
- __COMPAT_scx_bpf_events(&events, sizeof(events));
-
- bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
- scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
- bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
- scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
- bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
- scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
- bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
- scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
- bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
- scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
- scx_read_event(&events, SCX_EV_BYPASS_DURATION));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
- scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
- scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+ __COMPAT_scx_bpf_events(&events, sizeof(events));
+
+ bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
+ scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
+ scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
+ scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
+ bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
+ scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
+ bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
+ scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
+ scx_read_event(&events, SCX_EV_BYPASS_DURATION));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
+ scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
+ scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+ }
bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
return 0;
@@ -823,7 +862,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
struct bpf_timer *timer;
s32 ret;
- print_cpus();
+ if (print_msgs)
+ print_cpus();
ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
if (ret)
diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
index c4912ab2e76f..ef701d45ba43 100644
--- a/tools/sched_ext/scx_qmap.c
+++ b/tools/sched_ext/scx_qmap.c
@@ -20,7 +20,7 @@ const char help_fmt[] =
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
-" [-P] [-d PID] [-D LEN] [-p] [-v]\n"
+" [-P] [-M] [-d PID] [-D LEN] [-p] [-v]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
@@ -28,7 +28,8 @@ const char help_fmt[] =
" -T COUNT Stall every COUNT'th kernel thread\n"
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
" -b COUNT Dispatch upto COUNT tasks together\n"
-" -P Print out DSQ content to trace_pipe every second, use with -b\n"
+" -P Print out DSQ content and event counters to trace_pipe every second\n"
+" -M Print out debug messages to trace_pipe\n"
" -H Boost nice -20 tasks in SHARED_DSQ, use with -b\n"
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
" -D LEN Set scx_exit_info.dump buffer length\n"
@@ -66,7 +67,7 @@ int main(int argc, char **argv)
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
- while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
+ while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PMHd:D:Spvh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@@ -87,7 +88,10 @@ int main(int argc, char **argv)
skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
break;
case 'P':
- skel->rodata->print_shared_dsq = true;
+ skel->rodata->print_dsqs_and_events = true;
+ break;
+ case 'M':
+ skel->rodata->print_msgs = true;
break;
case 'H':
skel->rodata->highpri_boosting = true;
diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c
index 76d83199545c..06d4b13bf76b 100644
--- a/tools/sched_ext/scx_simple.c
+++ b/tools/sched_ext/scx_simple.c
@@ -7,6 +7,7 @@
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
+#include <assert.h>
#include <libgen.h>
#include <bpf/bpf.h>
#include <scx/common.h>
@@ -41,6 +42,7 @@ static void sigint_handler(int simple)
static void read_stats(struct scx_simple *skel, __u64 *stats)
{
int nr_cpus = libbpf_num_possible_cpus();
+ assert(nr_cpus > 0);
__u64 cnts[2][nr_cpus];
__u32 idx;
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl
index 580b4e246aec..d1ae5e92c615 100644
--- a/tools/scripts/syscall.tbl
+++ b/tools/scripts/syscall.tbl
@@ -408,3 +408,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index d07f14cb7aa4..0e151d0572d1 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -4,23 +4,19 @@ ldflags-y += --wrap=is_acpi_device_node
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
-ldflags-y += --wrap=devm_cxl_port_enumerate_dports
-ldflags-y += --wrap=devm_cxl_setup_hdm
-ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
-ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
-ldflags-y += --wrap=cxl_hdm_decode_init
-ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
-ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
+ldflags-y += --wrap=hmat_get_extended_linear_cache_size
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
CXL_CORE_SRC := $(DRIVERS)/cxl/core
ccflags-y := -I$(srctree)/drivers/cxl/
ccflags-y += -D__mock=__weak
+ccflags-y += -DCXL_TEST_ENABLE=1
ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
obj-m += cxl_acpi.o
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index f088792a8925..6754de35598d 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -2,6 +2,28 @@
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include "cxl.h"
+#include "exports.h"
/* Exporting of cxl_core symbols that are only used by cxl_test */
EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
+
+cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
+ __devm_cxl_add_dport_by_dev;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
+
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ return _devm_cxl_add_dport_by_dev(port, dport_dev);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
+
+cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
+
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ return _devm_cxl_switch_port_decoders_setup(port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
new file mode 100644
index 000000000000..7ebee7c0bd67
--- /dev/null
+++ b/tools/testing/cxl/exports.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef __MOCK_CXL_EXPORTS_H_
+#define __MOCK_CXL_EXPORTS_H_
+
+typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
+ struct device *dport_dev);
+extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
+
+typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
+extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
+
+#endif
diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
index 6b1927897856..af50972c8b6d 100644
--- a/tools/testing/cxl/test/Kbuild
+++ b/tools/testing/cxl/test/Kbuild
@@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
obj-m += cxl_test.o
obj-m += cxl_mock.o
obj-m += cxl_mock_mem.o
+obj-m += cxl_translate.o
cxl_test-y := cxl.o
cxl_mock-y := mock.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 6a25cca5636f..81e2aef3627a 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -15,6 +15,7 @@
#include "mock.h"
static int interleave_arithmetic;
+static bool extended_linear_cache;
#define FAKE_QTG_ID 42
@@ -26,6 +27,9 @@ static int interleave_arithmetic;
#define NR_CXL_PORT_DECODERS 8
#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
+#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M
+static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT;
+
static struct platform_device *cxl_acpi;
static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS)
@@ -210,7 +214,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -225,7 +229,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -240,7 +244,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -255,7 +259,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -270,7 +274,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -285,7 +289,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M,
@@ -302,7 +306,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -318,7 +322,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 1,
.granularity = 0,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -334,7 +338,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 8,
.granularity = 1,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_512M * 6UL,
@@ -426,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
return res;
}
+/* Only update CFMWS0 as this is used by the auto region. */
+static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index)
+{
+ if (!extended_linear_cache)
+ return;
+
+ if (index != 0)
+ return;
+
+ /*
+ * The window size should be 2x of the CXL region size where half is
+ * DRAM and half is CXL
+ */
+ window->window_size = mock_auto_region_size * 2;
+}
+
static int populate_cedt(void)
{
struct cxl_mock_res *res;
@@ -450,6 +470,7 @@ static int populate_cedt(void)
for (i = cfmws_start; i <= cfmws_end; i++) {
struct acpi_cedt_cfmws *window = mock_cfmws[i];
+ cfmws_elc_update(window, i);
res = alloc_mock_res(window->window_size, SZ_256M);
if (!res)
return -ENOMEM;
@@ -591,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
return AE_OK;
}
+static int
+mock_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *cache_size)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct resource cfmws0_res =
+ DEFINE_RES_MEM(window->base_hpa, window->window_size);
+
+ if (!extended_linear_cache ||
+ !resource_contains(&cfmws0_res, backing_res)) {
+ return hmat_get_extended_linear_cache_size(backing_res,
+ nid, cache_size);
+ }
+
+ *cache_size = mock_auto_region_size;
+
+ return 0;
+}
+
static struct pci_bus mock_pci_bus[NR_BRIDGES];
static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
[0] = {
@@ -643,15 +683,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
return cxlhdm;
}
-static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
-{
- dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
- return -EOPNOTSUPP;
-}
-
-
struct target_map_ctx {
- int *target_map;
+ u32 *target_map;
int index;
int target_count;
};
@@ -745,7 +778,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
struct cxl_endpoint_decoder *cxled;
struct cxl_switch_decoder *cxlsd;
struct cxl_port *port, *iter;
- const int size = SZ_512M;
struct cxl_memdev *cxlmd;
struct cxl_dport *dport;
struct device *dev;
@@ -788,9 +820,11 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
}
base = window->base_hpa;
+ if (extended_linear_cache)
+ base += mock_auto_region_size;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
cxld->interleave_ways = 2;
@@ -799,7 +833,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->flags = CXL_DECODER_F_ENABLE;
cxled->state = CXL_DECODER_STATE_AUTO;
port->commit_end = cxld->id;
- devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0);
+ devm_cxl_dpa_reserve(cxled, 0,
+ mock_auto_region_size / cxld->interleave_ways, 0);
cxld->commit = mock_decoder_commit;
cxld->reset = mock_decoder_reset;
@@ -818,15 +853,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
*/
if (WARN_ON(!dev))
continue;
+
cxlsd = to_cxl_switch_decoder(dev);
if (i == 0) {
/* put cxl_mem.4 second in the decode order */
- if (pdev->id == 4)
+ if (pdev->id == 4) {
cxlsd->target[1] = dport;
- else
+ cxld->target_map[1] = dport->port_id;
+ } else {
cxlsd->target[0] = dport;
- } else
+ cxld->target_map[0] = dport->port_id;
+ }
+ } else {
cxlsd->target[0] = dport;
+ cxld->target_map[0] = dport->port_id;
+ }
cxld = &cxlsd->cxld;
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->flags = CXL_DECODER_F_ENABLE;
@@ -842,7 +883,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->interleave_granularity = 4096;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
put_device(dev);
}
@@ -863,9 +904,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
target_count = NR_CXL_SWITCH_PORTS;
for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
- int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
struct target_map_ctx ctx = {
- .target_map = target_map,
.target_count = target_count,
};
struct cxl_decoder *cxld;
@@ -894,6 +933,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
cxld = &cxled->cxld;
}
+ ctx.target_map = cxld->target_map;
+
mock_init_hdm_decoder(cxld);
if (target_count) {
@@ -905,7 +946,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
}
}
- rc = cxl_decoder_add_locked(cxld, target_map);
+ rc = cxl_decoder_add_locked(cxld);
if (rc) {
put_device(&cxld->dev);
dev_err(&port->dev, "Failed to add decoder\n");
@@ -921,10 +962,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
return 0;
}
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+static int __mock_cxl_decoders_setup(struct cxl_port *port)
+{
+ struct cxl_hdm *cxlhdm;
+
+ cxlhdm = mock_cxl_setup_hdm(port, NULL);
+ if (IS_ERR(cxlhdm)) {
+ if (PTR_ERR(cxlhdm) != -ENODEV)
+ dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+ return PTR_ERR(cxlhdm);
+ }
+
+ return mock_cxl_enumerate_decoders(cxlhdm, NULL);
+}
+
+static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ if (is_cxl_root(port) || is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+ if (!is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int get_port_array(struct cxl_port *port,
+ struct platform_device ***port_array,
+ int *port_array_size)
{
struct platform_device **array;
- int i, array_size;
+ int array_size;
if (port->depth == 1) {
if (is_multi_bridge(port->uport_dev)) {
@@ -958,9 +1031,24 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return -ENXIO;
}
+ *port_array = array;
+ *port_array_size = array_size;
+
+ return 0;
+}
+
+static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ struct platform_device **array;
+ int rc, i, array_size;
+
+ rc = get_port_array(port, &array, &array_size);
+ if (rc)
+ return ERR_PTR(rc);
+
for (i = 0; i < array_size; i++) {
struct platform_device *pdev = array[i];
- struct cxl_dport *dport;
if (pdev->dev.parent != port->uport_dev) {
dev_dbg(&port->dev, "%s: mismatch parent %s\n",
@@ -969,14 +1057,14 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
continue;
}
- dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
- CXL_RESOURCE_NONE);
+ if (&pdev->dev != dport_dev)
+ continue;
- if (IS_ERR(dport))
- return PTR_ERR(dport);
+ return devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+ CXL_RESOURCE_NONE);
}
- return 0;
+ return ERR_PTR(-ENODEV);
}
/*
@@ -1035,11 +1123,12 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
.acpi_pci_find_root = mock_acpi_pci_find_root,
- .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
- .devm_cxl_setup_hdm = mock_cxl_setup_hdm,
- .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
- .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
+ .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
+ .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
+ .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
+ .hmat_get_extended_linear_cache_size =
+ mock_hmat_get_extended_linear_cache_size,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};
@@ -1529,6 +1618,8 @@ static __exit void cxl_test_exit(void)
module_param(interleave_arithmetic, int, 0444);
MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
+module_param(extended_linear_cache, bool, 0444);
+MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support");
module_init(cxl_test_init);
module_exit(cxl_test_exit);
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
new file mode 100644
index 000000000000..2200ae21795c
--- /dev/null
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2025 Intel Corporation. All rights reserved.
+
+/* Preface all log entries with "cxl_translate" */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+/* Maximum number of test vectors and entry length */
+#define MAX_TABLE_ENTRIES 128
+#define MAX_ENTRY_LEN 128
+
+/* Expected number of parameters in each test vector */
+#define EXPECTED_PARAMS 7
+
+/* Module parameters for test vectors */
+static char *table[MAX_TABLE_ENTRIES];
+static int table_num;
+
+/* Interleave Arithmetic */
+#define MODULO_MATH 0
+#define XOR_MATH 1
+
+/*
+ * XOR mapping configuration
+ * The test data sets all use the same set of xormaps. When additional
+ * data sets arrive for validation, this static setup will need to
+ * be changed to accept xormaps as additional parameters.
+ */
+struct cxl_cxims_data *cximsd;
+static u64 xormaps[] = {
+ 0x2020900,
+ 0x4041200,
+ 0x1010400,
+ 0x800,
+};
+
+static int nr_maps = ARRAY_SIZE(xormaps);
+
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+ [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+/**
+ * to_hpa - calculate an HPA offset from a DPA offset and position
+ *
+ * dpa_offset: device physical address offset
+ * pos: devices position in interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: host physical address offset
+ */
+static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
+ u8 math)
+{
+ u64 hpa_offset;
+
+ /* Calculate base HPA offset from DPA and position */
+ hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return hpa_offset;
+}
+
+/**
+ * to_dpa - translate an HPA offset to DPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: device physical address offset
+ */
+static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ offset =
+ cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return cxl_calculate_dpa_offset(offset, r_eiw, r_eig);
+}
+
+/**
+ * to_pos - extract an interleave position from an HPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: devices position in region interleave
+ */
+static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ /* Reverse XOR mapping if specified */
+ if (math == XOR_MATH)
+ offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+
+ return cxl_calculate_position(offset, r_eiw, r_eig);
+}
+
+/**
+ * run_translation_test - execute forward and reverse translations
+ *
+ * @dpa: device physical address
+ * @pos: expected position in region interleave
+ * @r_eiw: region encoded interleave ways
+ * @r_eig: region encoded interleave granularity
+ * @hb_ways: host bridge interleave ways
+ * @math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * @expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig,
+ u8 hb_ways, int math, u64 expect_hpa)
+{
+ u64 translated_spa, reverse_dpa;
+ int reverse_pos;
+
+ /* Test Device to Host translation: DPA + POS -> SPA */
+ translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math);
+ if (translated_spa != expect_hpa) {
+ pr_err("Device to host failed: expected HPA %llu, got %llu\n",
+ expect_hpa, translated_spa);
+ return -1;
+ }
+
+ /* Test Host to Device DPA translation: SPA -> DPA */
+ reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_dpa != dpa) {
+ pr_err("Host to Device DPA failed: expected %llu, got %llu\n",
+ dpa, reverse_dpa);
+ return -1;
+ }
+
+ /* Test Host to Device Position translation: SPA -> POS */
+ reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_pos != pos) {
+ pr_err("Position lookup failed: expected %d, got %d\n", pos,
+ reverse_pos);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * parse_test_vector - parse a single test vector string
+ *
+ * entry: test vector string to parse
+ * dpa: device physical address
+ * pos: expected position in region interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw,
+ u16 *r_eig, u8 *hb_ways, int *math,
+ u64 *expect_hpa)
+{
+ unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways;
+ int parsed;
+
+ parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw,
+ &tmp_r_eig, &tmp_hb_ways, math, expect_hpa);
+
+ if (parsed != EXPECTED_PARAMS) {
+ pr_err("Parse error: expected %d parameters, got %d in '%s'\n",
+ EXPECTED_PARAMS, parsed, entry);
+ return -EINVAL;
+ }
+ if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) {
+ pr_err("Parameter overflow in entry: '%s'\n", entry);
+ return -ERANGE;
+ }
+ if (*math != MODULO_MATH && *math != XOR_MATH) {
+ pr_err("Invalid math type %d in entry: '%s'\n", *math, entry);
+ return -EINVAL;
+ }
+ *r_eiw = tmp_r_eiw;
+ *r_eig = tmp_r_eig;
+ *hb_ways = tmp_hb_ways;
+
+ return 0;
+}
+
+/*
+ * setup_xor_mapping - Initialize XOR mapping data structure
+ *
+ * The test data sets all use the same HBIG so we can use one set
+ * of xormaps, and set the number to apply based on HBIW before
+ * calling cxl_do_xormap_calc().
+ *
+ * When additional data sets arrive for validation with different
+ * HBIG's this static setup will need to be updated.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int setup_xor_mapping(void)
+{
+ if (nr_maps <= 0)
+ return -EINVAL;
+
+ cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL);
+ if (!cximsd)
+ return -ENOMEM;
+
+ memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps));
+ cximsd->nr_maps = nr_maps;
+
+ return 0;
+}
+
+static int test_random_params(void)
+{
+ u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 };
+ u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 };
+ int i, ways, pos, reverse_pos;
+ u64 dpa, hpa, reverse_dpa;
+ int iterations = 10000;
+ int failures = 0;
+
+ for (i = 0; i < iterations; i++) {
+ /* Generate valid random parameters for eiw, eig, pos, dpa */
+ u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)];
+ u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)];
+
+ eiw_to_ways(eiw, &ways);
+ pos = get_random_u32() % ways;
+ dpa = get_random_u64() >> 12;
+
+ hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
+ reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+ reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+
+ if (reverse_dpa != dpa || reverse_pos != pos) {
+ pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+ i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
+ eig);
+
+ if (failures++ > 10) {
+ pr_err("test random too many failures, stop\n");
+ break;
+ }
+ }
+ }
+ pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+struct param_test {
+ u8 eiw;
+ u16 eig;
+ int pos;
+ bool expect; /* true: expect pass, false: expect fail */
+ const char *desc;
+};
+
+static struct param_test param_tests[] = {
+ { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" },
+ { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" },
+ { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" },
+ { 0x1, 0, 0, true, "2-way, eig=0, pos=0" },
+ { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" },
+ { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" },
+ { 0x2, 0, 0, true, "4-way, eig=0, pos=0" },
+ { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" },
+ { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" },
+ { 0x3, 0, 0, true, "8-way, eig=0, pos=0" },
+ { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" },
+ { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" },
+ { 0x4, 0, 0, true, "16-way, eig=0, pos=0" },
+ { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" },
+ { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" },
+ { 0x8, 0, 0, true, "3-way, eig=0, pos=0" },
+ { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" },
+ { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" },
+ { 0x9, 0, 0, true, "6-way, eig=0, pos=0" },
+ { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" },
+ { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" },
+ { 0xA, 0, 0, true, "12-way, eig=0, pos=0" },
+ { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" },
+ { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" },
+ { 0x5, 0, 0, false, "invalid eiw=5" },
+ { 0x7, 0, 0, false, "invalid eiw=7" },
+ { 0xB, 0, 0, false, "invalid eiw=0xB" },
+ { 0xFF, 0, 0, false, "invalid eiw=0xFF" },
+ { 0x1, 7, 0, false, "invalid eig=7 (out of range)" },
+ { 0x2, 0x10, 0, false, "invalid eig=0x10" },
+ { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" },
+ { 0x1, 0, -1, false, "pos < 0" },
+ { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" },
+ { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" },
+ { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" },
+ { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" },
+ { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" },
+ { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" },
+ { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" },
+};
+
+static int test_cxl_validate_translation_params(void)
+{
+ int i, rc, failures = 0;
+ bool valid;
+
+ for (i = 0; i < ARRAY_SIZE(param_tests); i++) {
+ struct param_test *t = &param_tests[i];
+
+ rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos);
+ valid = (rc == 0);
+
+ if (valid != t->expect) {
+ pr_err("test params failed: %s\n", t->desc);
+ failures++;
+ }
+ }
+ pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * cxl_translate_init
+ *
+ * Run the internal validation tests when no params are passed.
+ * Otherwise, parse the parameters (test vectors), and kick off
+ * the translation test.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int __init cxl_translate_init(void)
+{
+ int rc, i;
+
+ /* If no tables are passed, validate module params only */
+ if (table_num == 0) {
+ pr_info("Internal validation test start...\n");
+ rc = test_cxl_validate_translation_params();
+ if (rc)
+ return rc;
+
+ rc = test_random_params();
+ if (rc)
+ return rc;
+
+ pr_info("Internal validation test completed successfully\n");
+
+ return 0;
+ }
+
+ pr_info("CXL translate test module loaded with %d test vectors\n",
+ table_num);
+
+ rc = setup_xor_mapping();
+ if (rc)
+ return rc;
+
+ /* Process each test vector */
+ for (i = 0; i < table_num; i++) {
+ u64 dpa, expect_spa;
+ int pos, math;
+ u8 r_eiw, hb_ways;
+ u16 r_eig;
+
+ pr_debug("Processing test vector %d: '%s'\n", i, table[i]);
+
+ /* Parse the test vector */
+ rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig,
+ &hb_ways, &math, &expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " Failed to parse test vector '%s'\n",
+ i, table[i]);
+ continue;
+ }
+ /* Run the translation test */
+ rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math,
+ expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n",
+ i, dpa, pos, r_eiw, r_eig, hb_ways,
+ (math == XOR_MATH) ? "XOR" : "MODULO",
+ expect_spa);
+ } else {
+ pr_info("CXL Translate Test %d: PASS\n", i);
+ }
+ }
+
+ kfree(cximsd);
+ pr_info("CXL translate test completed\n");
+
+ return 0;
+}
+
+static void __exit cxl_translate_exit(void)
+{
+ pr_info("CXL translate test module unloaded\n");
+}
+
+module_param_array(table, charp, &table_num, 0444);
+MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cxl_test: cxl address translation test module");
+MODULE_IMPORT_NS("CXL");
+
+module_init(cxl_translate_init);
+module_exit(cxl_translate_exit);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index d533481672b7..176dcde570cd 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -250,22 +250,21 @@ static void mes_add_event(struct mock_event_store *mes,
* Vary the number of events returned to simulate events occuring while the
* logs are being read.
*/
-static int ret_limit = 0;
+static atomic_t event_counter = ATOMIC_INIT(0);
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
struct mock_event_log *log;
- u16 nr_overflow;
+ int ret_limit;
u8 log_type;
int i;
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
- if (!ret_limit)
- ret_limit = 1;
+ /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */
+ ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1;
if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
@@ -299,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
u64 ns;
pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
- pl->overflow_err_count = cpu_to_le16(nr_overflow);
+ pl->overflow_err_count = cpu_to_le16(log->nr_overflow);
ns = ktime_get_real_ns();
ns -= 5000000000; /* 5s ago */
pl->first_overflow_timestamp = cpu_to_le64(ns);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 1989ae020df3..44bce80ef3ff 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -10,12 +10,21 @@
#include <cxlmem.h>
#include <cxlpci.h>
#include "mock.h"
+#include "../exports.h"
static LIST_HEAD(mock);
+static struct cxl_dport *
+redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev);
+static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+
void register_cxl_mock_ops(struct cxl_mock_ops *ops)
{
list_add_rcu(&ops->list, &mock);
+ _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
+ _devm_cxl_switch_port_decoders_setup =
+ redirect_devm_cxl_switch_port_decoders_setup;
}
EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
@@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
{
+ _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+ _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
list_del_rcu(&ops->list);
synchronize_srcu(&cxl_mock_srcu);
}
@@ -99,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_integer);
+int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+ else
+ rc = hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size);
+
struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
{
int index;
@@ -131,70 +163,34 @@ __wrap_nvdimm_bus_register(struct device *dev,
}
EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
-struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
- struct cxl_endpoint_dvsec_info *info)
-
-{
- int index;
- struct cxl_hdm *cxlhdm;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- cxlhdm = ops->devm_cxl_setup_hdm(port, info);
- else
- cxlhdm = devm_cxl_setup_hdm(port, info);
- put_cxl_mock_ops(index);
-
- return cxlhdm;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL");
-
-int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_add_passthrough_decoder(port);
+ rc = ops->devm_cxl_switch_port_decoders_setup(port);
else
- rc = devm_cxl_add_passthrough_decoder(port);
+ rc = __devm_cxl_switch_port_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL");
-int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
+int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
int rc, index;
- struct cxl_port *port = cxlhdm->port;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = ops->devm_cxl_endpoint_decoders_setup(port);
else
- rc = devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = devm_cxl_endpoint_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL");
-
-int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- int rc, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_port_enumerate_dports(port);
- else
- rc = devm_cxl_port_enumerate_dports(port);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
{
@@ -211,39 +207,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL");
-int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
- struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_hdm_decode_init(cxlds, cxlhdm, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
-
-int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_dvsec_rr_decode(cxlds, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL");
-
struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev,
int port_id,
@@ -268,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL");
-resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
- struct cxl_dport *dport)
-{
- int index;
- resource_size_t component_reg_phys;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(dev))
- component_reg_phys = CXL_RESOURCE_NONE;
- else
- component_reg_phys = cxl_rcd_component_reg_phys(dev, dport);
- put_cxl_mock_ops(index);
-
- return component_reg_phys;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL");
-
void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
{
int index;
@@ -311,6 +257,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
+struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_dport *dport;
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
+ else
+ dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("cxl_test: emulation module");
MODULE_IMPORT_NS("ACPI");
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index d1b0271d2822..2684b89c8aa2 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,13 +19,14 @@ struct cxl_mock_ops {
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
- int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
- struct cxl_hdm *(*devm_cxl_setup_hdm)(
- struct cxl_port *port, struct cxl_endpoint_dvsec_info *info);
- int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
- int (*devm_cxl_enumerate_decoders)(
- struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+ int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
+ int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
+ struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
+ struct device *dport_dev);
+ int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 6fd864935319..bee7cb34a289 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -741,9 +741,9 @@ if ($start) {
die "Can not find file $bad\n";
}
if ($val eq "good") {
- run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ run_command "cp $output_config $good" or die "failed to copy $output_config to $good\n";
} elsif ($val eq "bad") {
- run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ run_command "cp $output_config $bad" or die "failed to copy $output_config to $bad\n";
}
}
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config
index 54ad8972681a..28edf816aa70 100644
--- a/tools/testing/kunit/configs/arch_uml.config
+++ b/tools/testing/kunit/configs/arch_uml.config
@@ -1,8 +1,7 @@
# Config options which are added to UML builds by default
-# Enable virtio/pci, as a lot of tests require it.
-CONFIG_VIRTIO_UML=y
-CONFIG_UML_PCI_OVER_VIRTIO=y
+# Enable pci, as a lot of tests require it.
+CONFIG_KUNIT_UML_PCI=y
# Enable FORTIFY_SOURCE for wider checking.
CONFIG_FORTIFY_SOURCE=y
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 7f9ae55fd6d5..cd99c1956331 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -228,7 +228,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input
fake_test.counts.passed = 1
output: Iterable[str] = input_data
- if request.raw_output == 'all':
+ if request.raw_output == 'all' or request.raw_output == 'full':
pass
elif request.raw_output == 'kunit':
output = kunit_parser.extract_tap_lines(output)
@@ -425,7 +425,7 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. '
'By default, filters to just KUnit output. Use '
'--raw_output=all to show everything',
- type=str, nargs='?', const='all', default=None, choices=['all', 'kunit'])
+ type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit'])
parser.add_argument('--json',
nargs='?',
help='Prints parsed test results as JSON to stdout or a file if '
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index c176487356e6..333cd3a4a56b 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -352,9 +352,9 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool:
lines.pop()
return True
-TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$')
+TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$')
-TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$')
+TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$')
def peek_test_name_match(lines: LineStream, test: Test) -> bool:
"""
@@ -379,6 +379,8 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool:
if not match:
return False
name = match.group(4)
+ if not name:
+ return False
return name == test.name
def parse_test_result(lines: LineStream, test: Test,
@@ -416,7 +418,7 @@ def parse_test_result(lines: LineStream, test: Test,
# Set name of test object
if skip_match:
- test.name = skip_match.group(4)
+ test.name = skip_match.group(4) or skip_match.group(5)
else:
test.name = match.group(4)
diff --git a/tools/testing/kunit/qemu_configs/mips.py b/tools/testing/kunit/qemu_configs/mips.py
new file mode 100644
index 000000000000..8899ac157b30
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/qemu_configs/mips64.py b/tools/testing/kunit/qemu_configs/mips64.py
new file mode 100644
index 000000000000..1478aed05b94
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mips64el.py b/tools/testing/kunit/qemu_configs/mips64el.py
new file mode 100644
index 000000000000..300c711d7a82
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64el.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64el',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mipsel.py b/tools/testing/kunit/qemu_configs/mipsel.py
new file mode 100644
index 000000000000..3d3543315b45
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mipsel.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mipsel',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
index 65d3f27feaf2..30d9ef18bcec 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
@@ -1,5 +1,5 @@
TAP version 13
-1..2
+1..3
# selftests: membarrier: membarrier_test_single_thread
# TAP version 13
# 1..2
@@ -12,3 +12,4 @@ ok 1 selftests: membarrier: membarrier_test_single_thread
# ok 1 sys_membarrier available
# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected
ok 2 selftests: membarrier: membarrier_test_multi_thread
+ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 68a064ce598c..8e3b6be53839 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -850,11 +850,22 @@ static int ndtest_probe(struct platform_device *pdev)
p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->label_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
-
+ if (!p->dimm_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cfd4378e2129..f87e9f251d13 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -670,6 +670,7 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
.addr = spa->spa,
.region = NULL,
};
+ struct nfit_mem *nfit_mem;
u64 dpa;
ret = device_for_each_child(&bus->dev, &ctx,
@@ -687,8 +688,12 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
*/
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem)
+ return -EINVAL;
- spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->devices[0].nfit_device_handle =
+ __to_nfit_memdev(nfit_mem)->device_handle;
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 84b8c3c92c79..2f830ff8396c 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -499,19 +499,17 @@ void ida_check_random(void)
goto repeat;
}
-void ida_simple_get_remove_test(void)
+void ida_alloc_free_test(void)
{
DEFINE_IDA(ida);
unsigned long i;
- for (i = 0; i < 10000; i++) {
- assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
- }
- assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+ for (i = 0; i < 10000; i++)
+ assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i);
+ assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0);
- for (i = 0; i < 10000; i++) {
- ida_simple_remove(&ida, i);
- }
+ for (i = 0; i < 10000; i++)
+ ida_free(&ida, i);
assert(ida_is_empty(&ida));
ida_destroy(&ida);
@@ -524,7 +522,7 @@ void user_ida_checks(void)
ida_check_nomem();
ida_check_conv_user();
ida_check_random();
- ida_simple_get_remove_test();
+ ida_alloc_free_test();
radix_tree_cpu_dead(1);
}
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 172700fb7784..5c1b18e3ed21 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -8,14 +8,6 @@
* difficult to handle in kernel tests.
*/
-#define CONFIG_DEBUG_MAPLE_TREE
-#define CONFIG_MAPLE_SEARCH
-#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31)
-#include "test.h"
-#include <stdlib.h>
-#include <time.h>
-#include <linux/init.h>
-
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
@@ -23,7 +15,9 @@
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
-#include "../../../lib/maple_tree.c"
+#include "test.h"
+
+#include "../shared/maple-shim.c"
#include "../../../lib/test_maple_tree.c"
#define RCU_RANGE_COUNT 1000
@@ -63,430 +57,6 @@ struct rcu_reader_struct {
struct rcu_test_struct2 *test;
};
-static int get_alloc_node_count(struct ma_state *mas)
-{
- int count = 1;
- struct maple_alloc *node = mas->alloc;
-
- if (!node || ((unsigned long)node & 0x1))
- return 0;
- while (node->node_count) {
- count += node->node_count;
- node = node->slot[0];
- }
- return count;
-}
-
-static void check_mas_alloc_node_count(struct ma_state *mas)
-{
- mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL);
- mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL);
- MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total);
- mas_destroy(mas);
-}
-
-/*
- * check_new_node() - Check the creation of new nodes and error path
- * verification.
- */
-static noinline void __init check_new_node(struct maple_tree *mt)
-{
-
- struct maple_node *mn, *mn2, *mn3;
- struct maple_alloc *smn;
- struct maple_node *nodes[100];
- int i, j, total;
-
- MA_STATE(mas, mt, 0, 0);
-
- check_mas_alloc_node_count(&mas);
-
- /* Try allocating 3 nodes */
- mtree_lock(mt);
- mt_set_non_kernel(0);
- /* request 3 nodes to be allocated. */
- mas_node_count(&mas, 3);
- /* Allocation request of 3. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 3);
- /* Allocate failed. */
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- mas_push_node(&mas, mn);
- mas_reset(&mas);
- mas_destroy(&mas);
- mtree_unlock(mt);
-
-
- /* Try allocating 1 node, then 2 more */
- mtree_lock(mt);
- /* Set allocation request to 1. */
- mas_set_alloc_req(&mas, 1);
- /* Check Allocation request of 1. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- mas_set_err(&mas, -ENOMEM);
- /* Validate allocation request. */
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- /* Eat the requested node. */
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mn->slot[0] != NULL);
- MT_BUG_ON(mt, mn->slot[1] != NULL);
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas.status = ma_start;
- mas_destroy(&mas);
- /* Allocate 3 nodes, will fail. */
- mas_node_count(&mas, 3);
- /* Drop the lock and allocate 3 nodes. */
- mas_nomem(&mas, GFP_KERNEL);
- /* Ensure 3 are allocated. */
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- /* Allocation request of 0. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 0);
-
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
- /* Ensure we counted 3. */
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- /* Free. */
- mas_reset(&mas);
- mas_destroy(&mas);
-
- /* Set allocation request to 1. */
- mas_set_alloc_req(&mas, 1);
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- mas_set_err(&mas, -ENOMEM);
- /* Validate allocation request. */
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != 1);
- /* Check the node is only one node. */
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mn->slot[0] != NULL);
- MT_BUG_ON(mt, mn->slot[1] != NULL);
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != 1);
- MT_BUG_ON(mt, mas.alloc->node_count);
-
- mas_set_alloc_req(&mas, 2); /* request 2 more. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 2);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
- for (i = 2; i >= 0; i--) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != i);
- MT_BUG_ON(mt, !mn);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
-
- total = 64;
- mas_set_alloc_req(&mas, total); /* request 2 more. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != total);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (i = total; i > 0; i--) {
- unsigned int e = 0; /* expected node_count */
-
- if (!MAPLE_32BIT) {
- if (i >= 35)
- e = i - 34;
- else if (i >= 5)
- e = i - 4;
- else if (i >= 2)
- e = i - 1;
- } else {
- if (i >= 4)
- e = i - 3;
- else if (i >= 1)
- e = i - 1;
- else
- e = 0;
- }
-
- MT_BUG_ON(mt, mas.alloc->node_count != e);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
- MT_BUG_ON(mt, !mn);
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
-
- total = 100;
- for (i = 1; i < total; i++) {
- mas_set_alloc_req(&mas, i);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (j = i; j > 0; j--) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
- MT_BUG_ON(mt, !mn);
- MT_BUG_ON(mt, not_empty(mn));
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != j);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
- mas_set_alloc_req(&mas, i);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (j = 0; j <= i/2; j++) {
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- nodes[j] = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
- }
-
- while (j) {
- j--;
- mas_push_node(&mas, nodes[j]);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != i);
- for (j = 0; j <= i/2; j++) {
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
- }
- mas_reset(&mas);
- MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL));
- mas_destroy(&mas);
-
- }
-
- /* Set allocation request. */
- total = 500;
- mas_node_count(&mas, total);
- /* Drop the lock and allocate the nodes. */
- mas_nomem(&mas, GFP_KERNEL);
- MT_BUG_ON(mt, !mas.alloc);
- i = 1;
- smn = mas.alloc;
- while (i < total) {
- for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) {
- i++;
- MT_BUG_ON(mt, !smn->slot[j]);
- if (i == total)
- break;
- }
- smn = smn->slot[0]; /* next. */
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != total);
- mas_reset(&mas);
- mas_destroy(&mas); /* Free. */
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- for (i = 1; i < 128; i++) {
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
- for (j = i; j > 0; j--) { /*Free the requests */
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- }
-
- for (i = 1; i < MAPLE_NODE_MASK + 1; i++) {
- MA_STATE(mas2, mt, 0, 0);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
- for (j = 1; j <= i; j++) { /* Move the allocations to mas2 */
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mas_push_node(&mas2, mn);
- MT_BUG_ON(mt, mas_allocated(&mas2) != j);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_allocated(&mas2) != i);
-
- for (j = i; j > 0; j--) { /*Free the requests */
- MT_BUG_ON(mt, mas_allocated(&mas2) != j);
- mn = mas_pop_node(&mas2); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
- }
-
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
-
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
-
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
-
- /* Check the limit of pop/push/pop */
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_alloc_req(&mas));
- MT_BUG_ON(mt, mas.alloc->node_count != 1);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas.alloc->node_count != 1);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
-
- for (i = 3; i < MAPLE_NODE_MASK * 3; i++) {
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put it back */
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn2 = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put them back */
- mas_push_node(&mas, mn2);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn2 = mas_pop_node(&mas); /* get the next node. */
- mn3 = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put them back */
- mas_push_node(&mas, mn2);
- mas_push_node(&mas, mn3);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas_destroy(&mas);
- }
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 5); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != 5);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 10); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != 10);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS - 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS - 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2);
- mas_destroy(&mas);
-
- mtree_unlock(mt);
-}
-
/*
* Check erasing including RCU.
*/
@@ -35455,17 +35025,6 @@ static void check_dfs_preorder(struct maple_tree *mt)
MT_BUG_ON(mt, count != e);
mtree_destroy(mt);
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- mas_reset(&mas);
- mt_zero_nr_tallocated();
- mt_set_non_kernel(200);
- mas_expected_entries(&mas, max);
- for (count = 0; count <= max; count++) {
- mas.index = mas.last = count;
- mas_store(&mas, xa_mk_value(count));
- MT_BUG_ON(mt, mas_is_err(&mas));
- }
- mas_destroy(&mas);
rcu_barrier();
/*
* pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n",
@@ -35524,6 +35083,18 @@ static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
return vacant_height;
}
+static int mas_allocated(struct ma_state *mas)
+{
+ int total = 0;
+
+ if (mas->alloc)
+ total++;
+
+ if (mas->sheaf)
+ total += kmem_cache_sheaf_size(mas->sheaf);
+
+ return total;
+}
/* Preallocation testing */
static noinline void __init check_prealloc(struct maple_tree *mt)
{
@@ -35542,7 +35113,10 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
/* Spanning store */
mas_set_range(&mas, 470, 500);
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+
+ mas_wr_preallocate(&wr_mas, ptr);
+ MT_BUG_ON(mt, mas.store_type != wr_spanning_store);
+ MT_BUG_ON(mt, mas_is_err(&mas));
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
vacant_height = get_vacant_height(&wr_mas, ptr);
@@ -35552,6 +35126,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
+ mas_wr_preallocate(&wr_mas, ptr);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
@@ -35597,20 +35172,6 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
height = mas_mt_height(&mas);
vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != allocated);
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
- mas_destroy(&mas);
- allocated = mas_allocated(&mas);
- MT_BUG_ON(mt, allocated != 0);
-
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
- allocated = mas_allocated(&mas);
- height = mas_mt_height(&mas);
- vacant_height = get_vacant_height(&wr_mas, ptr);
- MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -36327,13 +35888,18 @@ extern void test_kmem_cache_bulk(void);
static inline void check_spanning_store_height(struct maple_tree *mt)
{
int index = 0;
+ int last = 140;
MA_STATE(mas, mt, 0, 0);
mas_lock(&mas);
while (mt_height(mt) != 3) {
mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
mas_set(&mas, ++index);
}
- mas_set_range(&mas, 90, 140);
+
+ if (MAPLE_32BIT)
+ last = 155; /* 32 bit higher branching factor. */
+
+ mas_set_range(&mas, 90, last);
mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
mas_unlock(&mas);
@@ -36406,11 +35972,17 @@ static void check_nomem_writer_race(struct maple_tree *mt)
check_load(mt, 6, xa_mk_value(0xC));
mtree_unlock(mt);
+ mt_set_non_kernel(0);
/* test for the same race but with mas_store_gfp() */
mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
mas_set_range(&mas, 0, 5);
+
+ /* setup writer 2 that will trigger the race condition */
+ mt_set_private(mt);
+ mt_set_callback(writer2);
+
mtree_lock(mt);
mas_store_gfp(&mas, NULL, GFP_KERNEL);
@@ -36428,6 +36000,7 @@ static void check_nomem_writer_race(struct maple_tree *mt)
*/
static inline int check_vma_modification(struct maple_tree *mt)
{
+#if defined(CONFIG_64BIT)
MA_STATE(mas, mt, 0, 0);
mtree_lock(mt);
@@ -36451,30 +36024,11 @@ static inline int check_vma_modification(struct maple_tree *mt)
mas_destroy(&mas);
mtree_unlock(mt);
+#endif
+
return 0;
}
-/*
- * test to check that bulk stores do not use wr_rebalance as the store
- * type.
- */
-static inline void check_bulk_rebalance(struct maple_tree *mt)
-{
- MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
- int max = 10;
-
- build_full_tree(mt, 0, 2);
-
- /* erase every entry in the tree */
- do {
- /* set up bulk store mode */
- mas_expected_entries(&mas, max);
- mas_erase(&mas);
- MT_BUG_ON(mt, mas.store_type == wr_rebalance);
- } while (mas_prev(&mas, 0) != NULL);
-
- mas_destroy(&mas);
-}
void farmer_tests(void)
{
@@ -36487,10 +36041,6 @@ void farmer_tests(void)
check_vma_modification(&tree);
mtree_destroy(&tree);
- mt_init(&tree);
- check_bulk_rebalance(&tree);
- mtree_destroy(&tree);
-
tree.ma_root = xa_mk_value(0);
mt_dump(&tree, mt_dump_dec);
@@ -36550,10 +36100,6 @@ void farmer_tests(void)
check_erase_testset(&tree);
mtree_destroy(&tree);
- mt_init_flags(&tree, 0);
- check_new_node(&tree);
- mtree_destroy(&tree);
-
if (!MAPLE_32BIT) {
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_rcu_simulated(&tree);
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 5bd9e6e80625..121ae78d6e88 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -51,7 +51,6 @@ static inline unsigned long page_to_phys(struct page *page)
#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
-#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#define __min(t1, t2, min1, min2, x, y) ({ \
t1 min1 = (x); \
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 030da61dbff3..56e44a98d6a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -36,6 +36,7 @@ TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
TARGETS += filesystems/mount-notify
+TARGETS += filesystems/fuse
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
@@ -53,6 +54,7 @@ TARGETS += kvm
TARGETS += landlock
TARGETS += lib
TARGETS += livepatch
+TARGETS += liveupdate
TARGETS += lkdtm
TARGETS += lsm
TARGETS += membarrier
@@ -124,6 +126,7 @@ TARGETS += uevent
TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
+TARGETS += vfio
TARGETS += x86
TARGETS += x86/bugs
TARGETS += zram
@@ -314,7 +317,7 @@ gen_tar: install
@echo "Created ${TAR_PATH}"
clean:
- @for TARGET in $(TARGETS); do \
+ @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
index 87c044fb9293..421adbdc299d 100644
--- a/tools/testing/selftests/acct/acct_syscall.c
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -9,7 +9,7 @@
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index e2b3a5810f47..317212078e36 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -14,7 +14,7 @@
#include <regex.h>
#include <sys/stat.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define SYSFS_ROOT "/sys"
@@ -448,7 +448,7 @@ int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int de
ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
ret = snd_config_get_bool(cfg);
if (ret < 0)
- ksft_exit_fail_msg("key '%s'.'%s' is not an bool\n", key1, key2);
+ ksft_exit_fail_msg("key '%s'.'%s' is not a bool\n", key1, key2);
return !!ret;
}
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 2a4b2662035e..d4f845c32804 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -25,7 +25,7 @@
#include <poll.h>
#include <stdint.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define TESTS_PER_CONTROL 7
@@ -53,10 +53,10 @@ struct ctl_data {
struct ctl_data *next;
};
-int num_cards = 0;
-int num_controls = 0;
-struct card_data *card_list = NULL;
-struct ctl_data *ctl_list = NULL;
+int num_cards;
+int num_controls;
+struct card_data *card_list;
+struct ctl_data *ctl_list;
static void find_controls(void)
{
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index dbd7c222ce93..ee04ccef7d7c 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -17,7 +17,7 @@
#include <assert.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
typedef struct timespec timestamp_t;
@@ -30,7 +30,7 @@ struct card_data {
struct card_data *next;
};
-struct card_data *card_list = NULL;
+struct card_data *card_list;
struct pcm_data {
snd_pcm_t *handle;
@@ -43,10 +43,10 @@ struct pcm_data {
struct pcm_data *next;
};
-struct pcm_data *pcm_list = NULL;
+struct pcm_data *pcm_list;
-int num_missing = 0;
-struct pcm_data *pcm_missing = NULL;
+int num_missing;
+struct pcm_data *pcm_missing;
snd_config_t *default_pcm_config;
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index ca81afa4ee90..95065ef3b441 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -7,7 +7,7 @@
*/
#include <string.h>
#include <alsa/asoundlib.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define CH_NUM 4
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
index 37964f311a33..c45cb226bd8f 100644
--- a/tools/testing/selftests/alsa/utimer-test.c
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -6,7 +6,7 @@
*
* Author: Ivan Orlov <ivan.orlov0322@gmail.com>
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <sound/asound.h>
#include <unistd.h>
#include <fcntl.h>
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 002ec38a8bbb..c41640f18e4e 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -17,7 +17,9 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
-#include "../../kselftest.h"
+#include <linux/auxvec.h>
+
+#include "kselftest.h"
#define TESTS_PER_HWCAP 3
@@ -55,7 +57,6 @@ static void cmpbr_sigill(void)
/* Not implemented, too complicated and unreliable anyway */
}
-
static void crc32_sigill(void)
{
/* CRC32W W0, W0, W1 */
@@ -169,6 +170,18 @@ static void lse128_sigill(void)
: "cc", "memory");
}
+static void lsfe_sigill(void)
+{
+ float __attribute__ ((aligned (16))) mem;
+ register float *memp asm ("x0") = &mem;
+
+ /* STFADD H0, [X0] */
+ asm volatile(".inst 0x7c20801f"
+ : "+r" (memp)
+ :
+ : "memory");
+}
+
static void lut_sigill(void)
{
/* LUTI2 V0.16B, { V0.16B }, V[0] */
@@ -763,6 +776,13 @@ static const struct hwcap_data {
.sigill_fn = lse128_sigill,
},
{
+ .name = "LSFE",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_LSFE,
+ .cpuinfo = "lsfe",
+ .sigill_fn = lsfe_sigill,
+ },
+ {
.name = "LUT",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LUT,
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index b51d21f78cf9..0e46ac21c81d 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define EXPECTED_TESTS 11
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 5ec9a18ec802..b67e3e26fa6d 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -16,7 +16,7 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "syscall-abi.h"
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index f58a9f89b952..1703543fb7c7 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -182,16 +182,16 @@ static int write_clone_read(void)
}
for (;;) {
- waiting = wait4(ret, &status, __WCLONE, NULL);
+ waiting = waitpid(ret, &status, __WCLONE);
if (waiting < 0) {
if (errno == EINTR)
continue;
- ksft_print_msg("wait4() failed: %d\n", errno);
+ ksft_print_msg("waitpid() failed: %d\n", errno);
return 0;
}
if (waiting != ret) {
- ksft_print_msg("wait4() returned wrong PID %d\n",
+ ksft_print_msg("waitpid() returned wrong PID %d\n",
waiting);
return 0;
}
@@ -227,10 +227,10 @@ int main(int argc, char **argv)
ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
if (ret >= 0) {
ksft_test_result(default_value(), "default_value\n");
- ksft_test_result(write_read, "write_read\n");
- ksft_test_result(write_sleep_read, "write_sleep_read\n");
- ksft_test_result(write_fork_read, "write_fork_read\n");
- ksft_test_result(write_clone_read, "write_clone_read\n");
+ ksft_test_result(write_read(), "write_read\n");
+ ksft_test_result(write_sleep_read(), "write_sleep_read\n");
+ ksft_test_result(write_fork_read(), "write_fork_read\n");
+ ksft_test_result(write_clone_read(), "write_clone_read\n");
} else {
ksft_print_msg("SME support not present\n");
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
index 04e7b72880ef..141cdcbf0b8f 100644
--- a/tools/testing/selftests/arm64/bti/assembler.h
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -14,7 +14,6 @@
#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
-
.macro startfn name:req
.globl \name
\name:
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 124bc883365e..22c584b78be5 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -27,7 +27,7 @@
#include <asm/sve_context.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "fp-ptrace.h"
@@ -1071,7 +1071,7 @@ static bool sve_write_supported(struct test_config *config)
static bool sve_write_fpsimd_supported(struct test_config *config)
{
- if (!sve_supported())
+ if (!sve_supported() && !sme_supported())
return false;
if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
@@ -1187,7 +1187,7 @@ static void sve_write_sve(pid_t child, struct test_config *config)
if (!vl)
return;
- iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
@@ -1231,11 +1231,7 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config)
vl = vl_expected(config);
vq = __sve_vq_from_vl(vl);
- if (!vl)
- return;
-
- iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq,
- SVE_PT_REGS_FPSIMD);
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
@@ -1569,7 +1565,6 @@ static void run_sve_tests(void)
&test_config);
}
}
-
}
static void run_sme_tests(void)
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
index 74e23208b94c..65e01aba96ff 100644
--- a/tools/testing/selftests/arm64/fp/fp-stress.c
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define MAX_VLS 16
@@ -105,8 +105,8 @@ static void child_start(struct child_data *child, const char *program)
/*
* Read from the startup pipe, there should be no data
- * and we should block until it is closed. We just
- * carry on on error since this isn't super critical.
+ * and we should block until it is closed. We just
+ * carry-on on error since this isn't super critical.
*/
ret = read(3, &i, sizeof(i));
if (ret < 0)
@@ -549,7 +549,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < cpus; i++) {
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index e3cec3723ffa..0c40007d1282 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -188,13 +188,13 @@ static bool create_socket(void)
ref = malloc(digest_len);
if (!ref) {
- printf("Failed to allocated %d byte reference\n", digest_len);
+ printf("Failed to allocate %d byte reference\n", digest_len);
return false;
}
digest = malloc(digest_len);
if (!digest) {
- printf("Failed to allocated %d byte digest\n", digest_len);
+ printf("Failed to allocate %d byte digest\n", digest_len);
return false;
}
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index a24eca7a4ecb..df0c1b6eb114 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -12,7 +12,7 @@
#include <sys/prctl.h>
#include <asm/sigcontext.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index b22303778fb0..28f6b996c5e2 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
@@ -66,7 +66,7 @@ static const struct vec_type vec_types[] = {
};
#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
-#define FLAG_TESTS 2
+#define FLAG_TESTS 4
#define FPSIMD_TESTS 2
#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
@@ -95,19 +95,27 @@ static int do_child(void)
static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
+ return ret;
}
static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
}
static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
@@ -115,8 +123,9 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
{
struct user_sve_header *sve;
void *p;
- size_t sz = sizeof *sve;
+ size_t sz = sizeof(*sve);
struct iovec iov;
+ int ret;
while (1) {
if (*size < sz) {
@@ -132,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
iov.iov_base = *buf;
iov.iov_len = sz;
- if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov))
+ ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov);
+ if (ret) {
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
goto error;
+ }
sve = *buf;
if (sve->size <= sz)
@@ -152,10 +164,46 @@ static int set_sve(pid_t pid, const struct vec_type *type,
const struct user_sve_header *sve)
{
struct iovec iov;
+ int ret;
iov.iov_base = (void *)sve;
iov.iov_len = sve->size;
- return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
+}
+
+/* A read operation fails */
+static void read_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ void *ret;
+
+ ret = get_sve(child, type, (void **)&new_sve, &new_sve_size);
+
+ ksft_test_result(ret == NULL, "%s unsupported read fails\n",
+ type->name);
+
+ free(new_sve);
+}
+
+/* A write operation fails */
+static void write_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ /* Just the header, no data */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.vl = SVE_VL_MIN;
+ ret = set_sve(child, type, &sve);
+
+ ksft_test_result(ret != 0, "%s unsupported write fails\n",
+ type->name);
}
/* Validate setting and getting the inherit flag */
@@ -270,6 +318,25 @@ static void check_u32(unsigned int vl, const char *reg,
}
}
+/* Set out of range VLs */
+static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ memset(&sve, 0, sizeof(sve));
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.size = sizeof(sve);
+
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name);
+
+ sve.vl = SVE_VL_MAX + SVE_VQ_BYTES;
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+}
+
/* Access the FPSIMD registers via the SVE regset */
static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
{
@@ -327,6 +394,58 @@ out:
free(svebuf);
}
+/* Write the FPSIMD registers via the SVE regset when SVE is not supported */
+static void ptrace_sve_fpsimd_no_sve(pid_t child)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ /* On a system without SVE the VL should be set to 0 */
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 0;
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_sve(child, &vec_types[0], sve);
+ ksft_test_result(ret == 0, "FPSIMD write via SVE\n");
+ if (ret) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+ ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0,
+ "Verify FPSIMD write via SVE\n");
+
+out:
+ free(svebuf);
+}
+
/* Validate attempting to set SVE data and read SVE data */
static void ptrace_set_sve_get_sve_data(pid_t child,
const struct vec_type *type,
@@ -683,6 +802,20 @@ static int do_parent(pid_t child)
}
for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /*
+ * If the vector type isn't supported reads and writes
+ * should fail.
+ */
+ if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) {
+ read_fails(child, &vec_types[i]);
+ write_fails(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s unsupported read fails\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s unsupported write fails\n",
+ vec_types[i].name);
+ }
+
/* FPSIMD via SVE regset */
if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
ptrace_sve_fpsimd(child, &vec_types[i]);
@@ -703,6 +836,17 @@ static int do_parent(pid_t child)
vec_types[i].name);
}
+ /* Setting out of bounds VLs should fail */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_vl_ranges(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s Set invalid VL 0\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s Set invalid VL %d\n",
+ vec_types[i].name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+ }
+
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
@@ -734,6 +878,15 @@ static int do_parent(pid_t child)
}
}
+ /* We support SVE writes of FPSMID format on SME only systems */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE) &&
+ (getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ptrace_sve_fpsimd_no_sve(child);
+ } else {
+ ksft_test_result_skip("FPSIMD write via SVE\n");
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ }
+
ret = EXIT_SUCCESS;
error:
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index ea9c7d47790f..8dd932fdcdc4 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
#define ARCH_MIN_VL SVE_VL_MIN
@@ -690,7 +690,6 @@ static inline void smstop(void)
asm volatile("msr S0_3_C4_C6_3, xzr");
}
-
/*
* Verify we can change the SVE vector length while SME is active and
* continue to use SME afterwards.
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
index 08c777f87ea2..787eed22d059 100644
--- a/tools/testing/selftests/arm64/fp/za-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
index 584b8d59b7ea..f3fa49fd0fbd 100644
--- a/tools/testing/selftests/arm64/fp/zt-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
@@ -108,7 +108,6 @@ static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
}
-
static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
{
struct iovec iov;
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
index 38080f3c3280..a8df05771670 100644
--- a/tools/testing/selftests/arm64/fp/zt-test.S
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -276,7 +276,7 @@ function barf
bl putdec
puts ", iteration="
mov x0, x22
- bl putdec
+ bl putdecn
puts "\tExpected ["
mov x0, x10
mov x1, x12
diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
index d2f3497a9103..1fbbf0ca1f02 100644
--- a/tools/testing/selftests/arm64/gcs/Makefile
+++ b/tools/testing/selftests/arm64/gcs/Makefile
@@ -14,11 +14,11 @@ LDLIBS+=-lpthread
include ../../lib.mk
$(OUTPUT)/basic-gcs: basic-gcs.c
- $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
- -static -include ../../../../include/nolibc/nolibc.h \
+ $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \
+ -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \
-I../../../../../usr/include \
-std=gnu99 -I../.. -g \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -ffreestanding $^ -o $@ -lgcc
$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S
$(CC) -nostdlib $^ -o $@
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 54f9c888249d..250977abc398 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -10,6 +10,7 @@
#include <sys/mman.h>
#include <asm/mman.h>
+#include <asm/hwcap.h>
#include <linux/sched.h>
#include "kselftest.h"
@@ -386,14 +387,13 @@ int main(void)
ksft_print_header();
- /*
- * We don't have getauxval() with nolibc so treat a failure to
- * read GCS state as a lack of support and skip.
- */
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
&gcs_mode, 0, 0, 0);
if (ret != 0)
- ksft_exit_skip("Failed to read GCS state: %d\n", ret);
+ ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret);
if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
gcs_mode = PR_SHADOW_STACK_ENABLE;
@@ -410,7 +410,7 @@ int main(void)
}
/* One last test: disable GCS, we can do this one time */
- my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
if (ret != 0)
ksft_print_msg("Failed to disable GCS: %d\n", ret);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c
index 989f75a491b7..1e6abb136ffd 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-locking.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c
@@ -165,7 +165,6 @@ TEST_F(valid_modes, lock_enable_disable_others)
ASSERT_EQ(ret, 0);
ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES);
-
ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
variant->mode);
ASSERT_EQ(ret, 0);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
index bbc7f4950c13..86d8cd42aee7 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-stress.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
struct child_data {
char *name, *output;
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < gcs_threads; i++)
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
index 4435600ca400..e597861b26d6 100644
--- a/tools/testing/selftests/arm64/pauth/exec_target.c
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -13,7 +13,12 @@ int main(void)
unsigned long hwcaps;
size_t val;
- fread(&val, sizeof(size_t), 1, stdin);
+ size_t size = fread(&val, sizeof(size_t), 1, stdin);
+
+ if (size != 1) {
+ fprintf(stderr, "Could not read input from stdin\n");
+ return EXIT_FAILURE;
+ }
/* don't try to execute illegal (unimplemented) instructions) caller
* should have checked this and keep worker simple
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 6d21b2fc758d..67d138057707 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -10,7 +10,7 @@
#include <setjmp.h>
#include <sched.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "helper.h"
#define PAC_COLLISION_ATTEMPTS 1000
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 8ae26e496c89..375ab47f0edb 100644
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -6,7 +6,7 @@
#include <stdint.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3d8378972d26..19c1638e312a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,6 +23,7 @@ test_tcpnotify_user
test_libbpf
xdping
test_cpp
+test_progs_verification_cert
*.d
*.subskel.h
*.skel.h
@@ -32,7 +33,6 @@ test_cpp
/cpuv4
/host-tools
/tools
-/runqslower
/bench
/veristat
/sign-file
@@ -44,3 +44,4 @@ xdp_redirect_multi
xdp_synproxy
xdp_hw_metadata
xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3ebd77206f98..a17baf8c6fd7 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -2,4 +2,3 @@
# Alphabetical order
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
-verifier_iterating_callbacks
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4863106034df..b7030a6e2e76 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,6 +46,7 @@ endif
CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \
-Wall -Werror -fno-omit-frame-pointer \
+ -Wno-unused-but-set-variable \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
@@ -98,14 +99,11 @@ TEST_GEN_PROGS += test_progs-cpuv4
TEST_INST_SUBDIRS += cpuv4
endif
-TEST_GEN_FILES = test_tc_edt.bpf.o
TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_lirc_mode2.sh \
- test_tc_tunnel.sh \
- test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
@@ -120,14 +118,13 @@ TEST_PROGS_EXTENDED := \
test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = \
bench \
flow_dissector_load \
- runqslower \
test_cpp \
test_lirc_mode2_user \
veristat \
@@ -137,7 +134,7 @@ TEST_GEN_PROGS_EXTENDED = \
xdping \
xskxceiver
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
ifneq ($(V),1)
submake_extras := feature_display=0
@@ -209,8 +206,6 @@ HOST_INCLUDE_DIR := $(INCLUDE_DIR)
endif
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
-RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
-
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
@@ -232,7 +227,7 @@ $(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
$(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \
$(HOST_BUILD_DIR)/resolve_btfids \
- $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
+ $(INCLUDE_DIR))
$(MAKE_DIRS):
$(call msg,MKDIR,,$@)
$(Q)mkdir -p $@
@@ -304,17 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
USE_BOOTSTRAP := "bootstrap/"
endif
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
- OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
- BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
- BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \
- EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
- EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \
- cp $(RUNQSLOWER_OUTPUT)runqslower $@
-
TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
@@ -398,7 +382,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-# vmlinux.h is first dumped to a temprorary file and then compared to
+# vmlinux.h is first dumped to a temporary file and then compared to
# the previous version. This helps to avoid unnecessary re-builds of
# $(TRUNNER_BPF_OBJS)
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
@@ -453,7 +437,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
- -Wno-compare-distinct-pointer-types
+ -Wno-compare-distinct-pointer-types \
+ -Wno-initializer-overrides \
+ #
# TODO: enable me -Wsign-compare
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
@@ -496,15 +482,17 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
test_subskeleton.skel.h test_subskeleton_lib.skel.h \
test_usdt.skel.h
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
- trace_printk.c trace_vprintk.c map_ptr_kern.c \
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
+ test_ringbuf_overwrite.c
+
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
kfunc_call_test_subprog.c
-SKEL_BLACKLIST += $$(LSKELS)
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
@@ -535,12 +523,15 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
TRUNNER_BINARY := $1$(if $2,-)$2
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
$$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
+ $$(filter %.c,$(TRUNNER_LIB_SOURCES)))
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
@@ -550,6 +541,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -604,6 +596,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
@@ -653,6 +654,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_LSKELS_SIGNED) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
@@ -667,11 +669,16 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
$(TRUNNER_EXTRA_HDRS) \
+ $(VERIFY_SIG_HDR) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
+ $$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
# non-flavored in-srctree builds receive special treatment, in particular, we
# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
@@ -685,6 +692,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(TRUNNER_LIB_OBJS) \
$(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
$(OUTPUT)/veristat \
@@ -697,6 +705,19 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
endef
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+ $(Q)mkdir -p $(BUILD_DIR)
+ $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+ $(Q)ln -fs $< test_progs_verification_cert && \
+ xxd -i test_progs_verification_cert > $@
+
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
@@ -716,8 +737,10 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
disasm.c \
disasm_helpers.c \
json_writer.c \
+ $(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h
+TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
@@ -725,7 +748,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/uprobe_multi \
$(TEST_KMOD_TARGETS) \
ima_setup.sh \
- verify_sig_setup.sh \
+ $(VERIFY_SIG_SETUP) \
$(wildcard progs/btf_dump_test_case_*.c) \
$(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -755,6 +778,7 @@ endif
TRUNNER_TESTS_DIR := map_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_LIB_SOURCES :=
TRUNNER_EXTRA_FILES :=
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
TRUNNER_BPF_CFLAGS :=
@@ -776,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Include find_bit.c to compile xskxceiver.
-EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -816,6 +840,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -837,6 +862,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
$(OUTPUT)/bench_sockmap.o \
+ $(OUTPUT)/bench_lpm_trie_map.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
@@ -864,7 +890,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
no_alu32 cpuv4 bpf_gcc \
liburandom_read.so) \
- $(OUTPUT)/FEATURE-DUMP.selftests
+ $(OUTPUT)/FEATURE-DUMP.selftests \
+ test_progs_verification_cert
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index ddd73d06a1eb..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
{ &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+ { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
{},
};
@@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-/* pure counting benchmarks to establish theoretical lmits */
+/* pure counting benchmarks to establish theoretical limits */
extern const struct bench bench_trig_usermode_count;
extern const struct bench bench_trig_syscall_count;
extern const struct bench bench_trig_kernel_count;
@@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
@@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -578,6 +589,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
@@ -625,6 +638,13 @@ static const struct bench *benchs[] = {
&bench_crypto_encrypt,
&bench_crypto_decrypt,
&bench_sockmap,
+ &bench_lpm_trie_noop,
+ &bench_lpm_trie_baseline,
+ &bench_lpm_trie_lookup,
+ &bench_lpm_trie_insert,
+ &bench_lpm_trie_update,
+ &bench_lpm_trie_delete,
+ &bench_lpm_trie_free,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 005c401b3e22..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
unsigned long gp_ns;
unsigned long gp_ct;
unsigned int stime;
+ unsigned long duration_ns;
};
struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+ struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+ __u32 nr_entries;
+ __u32 prefixlen;
+ bool random;
+} args = {
+ .nr_entries = 0,
+ .prefixlen = 32,
+ .random = false,
+};
+
+enum {
+ ARG_NR_ENTRIES = 9000,
+ ARG_PREFIX_LEN,
+ ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Number of unique entries in the LPM trie" },
+ { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+ "Number of prefix bits to use in the LPM trie" },
+ { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+ {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_PREFIX_LEN:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid prefix_len value.");
+ argp_usage(state);
+ }
+ args.prefixlen = ret;
+ break;
+ case ARG_RANDOM:
+ args.random = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+ .options = opts,
+ .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer\n");
+ exit(1);
+ }
+
+ if (args.nr_entries == 0) {
+ fprintf(stderr, "Missing --nr_entries parameter\n");
+ exit(1);
+ }
+
+ if ((1UL << args.prefixlen) < args.nr_entries) {
+ fprintf(stderr, "prefix_len value too small for nr_entries\n");
+ exit(1);
+ }
+}
+
+static void lpm_insert_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-insert does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_delete_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-delete does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_free_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-free requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-free does not support --random\n");
+ exit(1);
+ }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch update keys to map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void empty_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch delete keys for map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void attach_prog(void)
+{
+ int i;
+
+ ctx.bench = lpm_trie_bench__open_and_load();
+ if (!ctx.bench) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.bench->bss->nr_entries = args.nr_entries;
+ ctx.bench->bss->prefixlen = args.prefixlen;
+ ctx.bench->bss->random = args.random;
+
+ if (lpm_trie_bench__attach(ctx.bench)) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ exit(1);
+ }
+
+ keys = calloc(args.nr_entries, sizeof(*keys));
+ vals = calloc(args.nr_entries, sizeof(*vals));
+
+ for (i = 0; i < args.nr_entries; i++) {
+ struct trie_key *k = &keys[i];
+ __u32 *v = &vals[i];
+
+ k->prefixlen = args.prefixlen;
+ k->data = i;
+ *v = 1;
+ }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+ int fd;
+
+ attach_prog();
+
+ fd = bpf_map__fd(ctx.bench->maps.trie_map);
+ fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+ res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+ int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+ switch (ctx.bench->bss->op) {
+ case LPM_OP_INSERT:
+ /* trie_map needs to be emptied */
+ empty_map(fd);
+ break;
+ case LPM_OP_DELETE:
+ /* trie_map needs to be refilled */
+ fill_map(fd);
+ break;
+ default:
+ fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+ ctx.bench->bss->op);
+ exit(1);
+ }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+ int err;
+ char in[ETH_HLEN]; /* unused */
+
+ LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+ .data_size_in = sizeof(in), .repeat = 1, );
+
+ while (true) {
+ int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err) {
+ fprintf(stderr, "failed to run BPF prog: %d\n", err);
+ exit(1);
+ }
+
+ /* Check for kernel error code */
+ if ((int)opts.retval < 0) {
+ fprintf(stderr, "BPF prog returned error: %d\n",
+ opts.retval);
+ exit(1);
+ }
+
+ switch (opts.retval) {
+ case LPM_BENCH_SUCCESS:
+ break;
+ case LPM_BENCH_REINIT_MAP:
+ bench_reinit_map();
+ break;
+ default:
+ fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+ opts.retval, ctx.bench->bss->op);
+ exit(1);
+ }
+ }
+
+ return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+ while (true) {
+ struct lpm_trie_map *skel;
+
+ skel = lpm_trie_map__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ fill_map(bpf_map__fd(skel->maps.trie_free_map));
+ lpm_trie_map__destroy(skel);
+ }
+
+ return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+ long delta_ns, double rate_divisor,
+ char rate)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / rate_divisor /
+ (res->duration_ns / (double)NSEC_PER_SEC);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter,
+ (delta_ns - NSEC_PER_SEC) / 1000.0);
+ printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+ hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+ double lat_divisor, double rate_divisor,
+ char rate, const char *unit)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ double latency = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ double val = res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_mean += val / (0.0 + res_cnt);
+ latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ double val =
+ res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_stddev += (hits_mean - val) * (hits_mean - val) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+ hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+ rate);
+ printf("latency %8.3lf %s/op\n",
+ latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000.0;
+ char rate = 'K';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1000000.0;
+ double rate_divisor = 1000.0;
+ const char *unit = "ms";
+ char rate = 'K';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+ .name = "lpm-trie-noop",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_noop_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+ .name = "lpm-trie-baseline",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_baseline_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+ .name = "lpm-trie-lookup",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_lookup_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+ .name = "lpm-trie-insert",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_insert_validate,
+ .setup = lpm_insert_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = insert_ops_report_progress,
+ .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+ .name = "lpm-trie-update",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_update_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+ .name = "lpm-trie-delete",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_delete_validate,
+ .setup = lpm_delete_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = delete_ops_report_progress,
+ .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+ .name = "lpm-trie-free",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_free_validate,
+ .setup = lpm_free_setup,
+ .producer_thread = lpm_free_producer,
+ .measure = lpm_measure,
+ .report_progress = free_ops_report_progress,
+ .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..01bdce692799 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,8 @@ static struct {
int ringbuf_sz; /* per-ringbuf, in bytes */
bool ringbuf_use_output; /* use slower output API */
int perfbuf_sz; /* per-CPU size, in pages */
+ bool overwrite;
+ bool bench_producer;
} args = {
.back2back = false,
.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
.ringbuf_sz = 512 * 1024,
.ringbuf_use_output = false,
.perfbuf_sz = 128,
+ .overwrite = false,
+ .bench_producer = false,
};
enum {
@@ -35,6 +39,8 @@ enum {
ARG_RB_BATCH_CNT = 2002,
ARG_RB_SAMPLED = 2003,
ARG_RB_SAMPLE_RATE = 2004,
+ ARG_RB_OVERWRITE = 2005,
+ ARG_RB_BENCH_PRODUCER = 2006,
};
static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
+ { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
{},
};
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case ARG_RB_OVERWRITE:
+ args.overwrite = true;
+ break;
+ case ARG_RB_BENCH_PRODUCER:
+ args.bench_producer = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
static void bufs_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
+ if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
+ fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
+ exit(1);
+ }
+
+ if (args.overwrite && !args.bench_producer) {
+ fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && env.consumer_cnt != 0) {
+ fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.back2back) {
+ fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.sampled) {
+ fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (!args.bench_producer && env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
exit(1);
}
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
- res->hits = atomic_swap(&buf_hits.value, 0);
+ if (args.bench_producer)
+ res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+ else
+ res->hits = atomic_swap(&buf_hits.value, 0);
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
+ __u32 flags;
+ struct bpf_map *ringbuf;
struct ringbuf_bench *skel;
setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
skel->rodata->batch_cnt = args.batch_cnt;
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+ skel->rodata->bench_producer = args.bench_producer;
if (args.sampled)
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+ ringbuf = skel->maps.ringbuf;
+ if (args.overwrite) {
+ flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
+ bpf_map__set_map_flags(ringbuf, flags);
+ }
+
+ bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
struct bpf_link *link;
+ int map_fd;
ctx->skel = ringbuf_setup_skeleton();
- ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
- buf_process_sample, NULL, NULL);
+
+ map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
if (!ctx->ringbuf) {
fprintf(stderr, "failed to create ringbuf\n");
exit(1);
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index 8ebf563a67a2..cfc072aa7fff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -10,6 +10,7 @@
#include <argp.h>
#include "bench.h"
#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
#define FILE_SIZE (128 * 1024)
#define DATA_REPEAT_SIZE 10
@@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void)
{
int i;
- for (i = 0; i < sizeof(ctx.fds); i++) {
- if (ctx.fds[0] > 0)
+ for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+ if (ctx.fds[i] > 0)
close(ctx.fds[i]);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 82327657846e..34018fc3927f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
- bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
load_ctx();
/* override driver program */
- ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
}
static void trigger_kprobe_setup(void)
@@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ /* Some recursive functions will be skipped in
+ * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+ * overhead. However, it's difficut to skip all the recursive
+ * functions for a debug kernel.
+ *
+ * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+ * a debug kernel.
+ */
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ fprintf(stderr, "failed to get ksyms\n");
+ exit(1);
+ }
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
static void trigger_fexit_setup(void)
{
setup_ctx();
@@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..83e05e837871 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
- kprobe kprobe-multi \
- kretprobe kretprobe-multi \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
)
tests=("$@")
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index 85dbc3ea4da5..e16fa7d95fcf 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
static inline void __list_del(arena_list_node_t *n)
{
- arena_list_node_t *next = n->next, *tmp;
+ arena_list_node_t *next = n->next;
arena_list_node_t * __arena *pprev = n->pprev;
cast_user(next);
cast_kern(pprev);
- tmp = *pprev;
- cast_kern(tmp);
- WRITE_ONCE(tmp, next);
+ WRITE_ONCE(*pprev, next);
if (next) {
cast_user(pprev);
cast_kern(next);
diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
new file mode 100644
index 000000000000..c1b6eaa905bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
+{
+ const char __arena *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ cond_break;
+ return sc - s;
+}
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const __arena *back_pat = NULL, *back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const __arena *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ cond_break;
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ __attribute__((__fallthrough__));
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ cond_break;
+ }
+ return false;
+}
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index da7e230f2781..2cd9165c7348 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym;
* including the NULL termination character, stored in the supplied
* buffer. On error, a negative integer is returned.
*/
-extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym;
+extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
/* This macro must be used to mark the exception callback corresponding to the
* main program. For example:
@@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
struct bpf_dynptr *value_p) __weak __ksym;
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define HARDIRQ_BITS 4
+#define NMI_BITS 4
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+ int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+ return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+ return 0;
+}
+
+/* Description
+ * Report whether it is in interrupt context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 9386dfe8b884..e0189254bb6e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,14 +19,17 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym __weak;
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
+ void *buffer, __u64 buffer__szk) __ksym __weak;
/* Description
* Obtain a read-write pointer to the dynptr's data
@@ -34,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
-extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
+ __u64 buffer__szk) __ksym __weak;
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
/* Description
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 5f6963a320d7..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
#define sys_gettid() syscall(SYS_gettid)
#endif
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 15f626014872..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path)
log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path_pid(cgroup_path, relative_path, pid);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
* @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 182e1ac36c95..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path);
int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
unsigned long long get_cgroup_id(const char *relative_path);
int get_cgroup1_hierarchy_id(const char *subsys_name);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8916ab814a3e..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y
CONFIG_IPV6_TUNNEL=y
CONFIG_KEYS=y
CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
CONFIG_LWTUNNEL=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SRCVERSION_ALL=y
@@ -61,6 +62,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
@@ -110,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
CONFIG_PACKET=y
CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
@@ -122,3 +126,8 @@ CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
CONFIG_TCP_CONG_DCTCP=y
CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index e1495a4bbc99..7efad36ceb26 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -31,10 +31,7 @@ CONFIG_COMPAT=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
@@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
CONFIG_DRM=y
-CONFIG_DUMMY=y
CONFIG_EXPERT=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -70,13 +66,11 @@ CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
-CONFIG_INET_ESP=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NET=y
-CONFIG_NF_TABLES=y
CONFIG_NLMON=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_OVERLAY_FS=y
CONFIG_PACKET_DIAG=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
@@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TCG_TIS=y
CONFIG_TCG_TPM=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS=y
@@ -161,6 +150,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
index 9acf389dc4ce..b53afb5e0b71 100644
--- a/tools/testing/selftests/bpf/config.ppc64el
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -54,7 +54,6 @@ CONFIG_NET=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NONPORTABLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
index bb7043a80e1a..7bee24a79a71 100644
--- a/tools/testing/selftests/bpf/config.riscv64
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -48,7 +48,6 @@ CONFIG_NET_VRF=y
CONFIG_NONPORTABLE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 26c3bc2ce11d..db61878148e4 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
CONFIG_INET=y
-CONFIG_INET_ESP=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
-CONFIG_NF_TABLES=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_TASK_XACCT=y
CONFIG_TASKSTATS=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -131,6 +121,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5e713ef7caa3..42ad817b00ae 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -44,7 +44,6 @@ CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEFAULT_FQ_CODEL=y
@@ -104,12 +103,10 @@ CONFIG_HZ_1000=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_INTEL_POWERCLAMP=y
-CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_IP_ROUTE_MULTIPATH=y
@@ -162,7 +159,6 @@ CONFIG_NUMA=y
CONFIG_NUMA_BALANCING=y
CONFIG_NVMEM=y
CONFIG_OSF_PARTITION=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -220,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_CPUID=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 72b5c174ab3b..0a6a5561bed3 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms)
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
const struct network_helper_opts *opts)
{
- int fd;
+ int on = 1, fd;
if (!opts)
opts = &default_opts;
@@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a
if (settimeo(fd, opts->timeout_ms))
goto error_close;
+ if (type == SOCK_STREAM &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
+ log_err("Failed to enable SO_REUSEADDR");
+ goto error_close;
+ }
+
if (opts->post_socket_cb &&
opts->post_socket_cb(fd, opts->cb_opts)) {
log_err("Failed to call post_socket_cb");
@@ -457,7 +463,7 @@ int append_tid(char *str, size_t sz)
if (end + 8 > sz)
return -1;
- sprintf(&str[end], "%07d", gettid());
+ sprintf(&str[end], "%07ld", sys_gettid());
str[end + 7] = '\0';
return 0;
@@ -766,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
return err;
}
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
+{
+ int ifindex, ret;
+
+ if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
+ "at least one program fd is valid"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+ .priority = 1, .prog_fd = ingress_fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+ .priority = 1, .prog_fd = egress_fd);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (ingress_fd >= 0) {
+ hook.attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(&hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ if (egress_fd >= 0) {
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(&hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx {
pcap_t *pcap;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index ef208eefd571..79a010c88e11 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -255,6 +255,22 @@ struct tmonitor_ctx;
typedef int (*tm_print_fn_t)(const char *format, va_list args);
+/**
+ * tc_prog_attach - attach BPF program(s) to an interface
+ *
+ * Takes file descriptors pointing to at least one, at most two BPF
+ * programs, and attach those programs to an interface ingress, egress or
+ * both.
+ *
+ * @dev: string containing the interface name
+ * @ingress_fd: file descriptor of the program to attach to interface ingress
+ * @egress_fd: file descriptor of the program to attach to interface egress
+ *
+ * Returns 0 on success, -1 if no valid file descriptor has been found, if
+ * the interface name is invalid or if an error ocurred during attach.
+ */
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 1d53a8561ee2..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "2"},
- {1, "R3_w", "4"},
- {2, "R3_w", "8"},
- {3, "R3_w", "16"},
- {4, "R3_w", "32"},
+ {0, "R3", "2"},
+ {1, "R3", "4"},
+ {2, "R3", "8"},
+ {3, "R3", "16"},
+ {4, "R3", "32"},
},
},
{
@@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "1"},
- {1, "R3_w", "2"},
- {2, "R3_w", "4"},
- {3, "R3_w", "8"},
- {4, "R3_w", "16"},
- {5, "R3_w", "1"},
- {6, "R4_w", "32"},
- {7, "R4_w", "16"},
- {8, "R4_w", "8"},
- {9, "R4_w", "4"},
- {10, "R4_w", "2"},
+ {0, "R3", "1"},
+ {1, "R3", "2"},
+ {2, "R3", "4"},
+ {3, "R3", "8"},
+ {4, "R3", "16"},
+ {5, "R3", "1"},
+ {6, "R4", "32"},
+ {7, "R4", "16"},
+ {8, "R4", "8"},
+ {9, "R4", "4"},
+ {10, "R4", "2"},
},
},
{
@@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "4"},
- {1, "R3_w", "8"},
- {2, "R3_w", "10"},
- {3, "R4_w", "8"},
- {4, "R4_w", "12"},
- {5, "R4_w", "14"},
+ {0, "R3", "4"},
+ {1, "R3", "8"},
+ {2, "R3", "10"},
+ {3, "R4", "8"},
+ {4, "R4", "12"},
+ {5, "R4", "14"},
},
},
{
@@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "7"},
- {1, "R3_w", "7"},
- {2, "R3_w", "14"},
- {3, "R3_w", "56"},
+ {0, "R3", "7"},
+ {1, "R3", "7"},
+ {2, "R3", "14"},
+ {3, "R3", "56"},
},
},
@@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8)"},
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R3_w", "var_off=(0x0; 0x1fe)"},
- {8, "R3_w", "var_off=(0x0; 0x3fc)"},
- {9, "R3_w", "var_off=(0x0; 0x7f8)"},
- {10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end()"},
- {17, "R4_w", "var_off=(0x0; 0xff)"},
- {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
- {19, "R4_w", "var_off=(0x0; 0xff0)"},
- {20, "R4_w", "var_off=(0x0; 0x7f8)"},
- {21, "R4_w", "var_off=(0x0; 0x3fc)"},
- {22, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {6, "R0", "pkt(off=8,r=8)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R3", "var_off=(0x0; 0x1fe)"},
+ {8, "R3", "var_off=(0x0; 0x3fc)"},
+ {9, "R3", "var_off=(0x0; 0x7f8)"},
+ {10, "R3", "var_off=(0x0; 0xff0)"},
+ {12, "R3", "pkt_end()"},
+ {17, "R4", "var_off=(0x0; 0xff)"},
+ {18, "R4", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4", "var_off=(0x0; 0xff0)"},
+ {20, "R4", "var_off=(0x0; 0x7f8)"},
+ {21, "R4", "var_off=(0x0; 0x3fc)"},
+ {22, "R4", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R4_w", "var_off=(0x0; 0xff)"},
- {8, "R4_w", "var_off=(0x0; 0xff)"},
- {9, "R4_w", "var_off=(0x0; 0xff)"},
- {10, "R4_w", "var_off=(0x0; 0x1fe)"},
- {11, "R4_w", "var_off=(0x0; 0xff)"},
- {12, "R4_w", "var_off=(0x0; 0x3fc)"},
- {13, "R4_w", "var_off=(0x0; 0xff)"},
- {14, "R4_w", "var_off=(0x0; 0x7f8)"},
- {15, "R4_w", "var_off=(0x0; 0xff0)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R4", "var_off=(0x0; 0xff)"},
+ {8, "R4", "var_off=(0x0; 0xff)"},
+ {9, "R4", "var_off=(0x0; 0xff)"},
+ {10, "R4", "var_off=(0x0; 0x1fe)"},
+ {11, "R4", "var_off=(0x0; 0xff)"},
+ {12, "R4", "var_off=(0x0; 0x3fc)"},
+ {13, "R4", "var_off=(0x0; 0xff)"},
+ {14, "R4", "var_off=(0x0; 0x7f8)"},
+ {15, "R4", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(r=0)"},
- {4, "R5_w", "pkt(off=14,r=0)"},
- {5, "R4_w", "pkt(off=14,r=0)"},
+ {2, "R5", "pkt(r=0)"},
+ {4, "R5", "pkt(off=14,r=0)"},
+ {5, "R4", "pkt(off=14,r=0)"},
{9, "R2", "pkt(r=18)"},
{10, "R5", "pkt(off=14,r=18)"},
- {10, "R4_w", "var_off=(0x0; 0xff)"},
- {13, "R4_w", "var_off=(0x0; 0xffff)"},
- {14, "R4_w", "var_off=(0x0; 0xffff)"},
+ {10, "R4", "var_off=(0x0; 0xff)"},
+ {13, "R4", "var_off=(0x0; 0xffff)"},
+ {14, "R4", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w", "pkt(id=1,off=14,"},
+ {11, "R5", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
@@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = {
* instruction to validate R5 state. We also check
* that R4 is what it should be in such case.
*/
- {18, "R4_w", "var_off=(0x0; 0x3fc)"},
- {18, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R4", "var_off=(0x0; 0x3fc)"},
+ {18, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w", "pkt(id=2,off=14,"},
+ {19, "R5", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8)"},
+ {26, "R5", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
*/
- {28, "R4_w", "var_off=(0x0; 0x3fc)"},
- {28, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R4", "var_off=(0x0; 0x3fc)"},
+ {28, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5_w", "pkt(id=3,off=18,"},
+ {29, "R5", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {31, "R4_w", "var_off=(0x0; 0x7fc)"},
- {31, "R5_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R4", "var_off=(0x0; 0x7fc)"},
+ {31, "R5", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {8, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {8, "R6", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {11, "R5", "var_off=(0x2; 0x7fc)"},
{12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {17, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {17, "R6", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {19, "R5", "var_off=(0x2; 0xffc)"},
{20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end()"},
+ {3, "R5", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {8, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {8, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {9, "R6", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {10, "R7_w", "var_off=(0x0; 0x3fc)"},
+ {10, "R7", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
@@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {9, "R6_w", "var_off=(0x0; 0x3c)"},
+ {6, "R2", "pkt(r=8)"},
+ {9, "R6", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w", "var_off=(0x2; 0x7c)"},
+ {10, "R6", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {14, "R7_w", "var_off=(0x0; 0x7fc)"},
+ {14, "R7", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {16, "R5", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 0223fce4db2b..693fd86fbde6 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -40,8 +40,13 @@ static void *spin_lock_thread(void *arg)
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run err");
+
+ if (topts.retval == -EOPNOTSUPP)
+ goto end;
+
ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+end:
pthread_exit(arg);
}
@@ -63,6 +68,7 @@ static void test_arena_spin_lock_size(int size)
skel = arena_spin_lock__open_and_load();
if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
return;
+
if (skel->data->test_skip == 2) {
test__skip();
goto end;
@@ -86,6 +92,13 @@ static void test_arena_spin_lock_size(int size)
goto end_barrier;
}
+ if (skel->data->test_skip == 3) {
+ printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+ __func__);
+ test__skip();
+ goto end_barrier;
+ }
+
ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
new file mode 100644
index 000000000000..f81a0c066505
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_strsearch.skel.h"
+
+static void test_arena_str(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_strsearch *skel;
+ int ret;
+
+ skel = arena_strsearch__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ }
+ arena_strsearch__destroy(skel);
+}
+
+void test_arena_strsearch(void)
+{
+ if (test__start_subtest("arena_strsearch"))
+ test_arena_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index bb143de68875..e27d66b75fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -144,11 +144,17 @@ static void test_parse_test_list_file(void)
if (!ASSERT_OK(ferror(fp), "prepare tmp"))
goto out_fclose;
+ if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+ goto out_fclose;
+
init_test_filter_set(&set);
- ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+ if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+ goto out_fclose;
+
+ if (!ASSERT_EQ(set.cnt, 4, "test count"))
+ goto out_free_set;
- ASSERT_EQ(set.cnt, 4, "test count");
ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
@@ -158,8 +164,8 @@ static void test_parse_test_list_file(void)
ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+out_free_set:
free_test_filter_set(&set);
-
out_fclose:
fclose(fp);
out_remove:
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 13e101f370a1..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel)
void test_atomics(void)
{
struct atomics_lskel *skel;
+ int err;
- skel = atomics_lskel__open_and_load();
- if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+ skel = atomics_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
return;
+ skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = atomics_lskel__load(skel);
+ if (!ASSERT_OK(err, "atomics skeleton load"))
+ goto cleanup;
+
if (skel->data->skip_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index cabc51c2ca6b..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -3,6 +3,7 @@
#include "test_attach_kprobe_sleepable.skel.h"
#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
@@ -201,6 +202,31 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -406,6 +432,8 @@ void test_attach_probe(void)
test_attach_uprobe_long_event_name();
if (test__start_subtest("kprobe-long_name"))
test_attach_kprobe_long_event_name();
+ if (test__start_subtest("kprobe-write-ctx"))
+ test_attach_kprobe_write_ctx();
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 4a0670c056ba..75f4dff7d042 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
- attr.freq = 1;
- attr.sample_freq = 10000;
+ attr.sample_period = 100000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
new file mode 100644
index 000000000000..d138cc7b1bda
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <sys/syscall.h>
+#include <bpf/bpf.h>
+
+#include "bpf_gotox.skel.h"
+
+static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .ctx_in = ctx_in,
+ .ctx_size_in = ctx_size_in,
+ );
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+}
+
+static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
+{
+ if (skel->data->skip)
+ test__skip();
+ else
+ check(skel);
+}
+
+static void check_simple(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->ret_user = 0;
+
+ __test_run(prog, &ctx_in, sizeof(ctx_in));
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+static void check_simple_fentry(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->in_user = ctx_in;
+ skel->bss->ret_user = 0;
+
+ /* trigger */
+ usleep(1);
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+/* validate that for two loads of the same jump table libbpf generates only one map */
+static void check_one_map_two_jumps(struct bpf_gotox *skel)
+{
+ struct bpf_prog_info prog_info;
+ struct bpf_map_info map_info;
+ __u32 len;
+ __u32 map_ids[16];
+ int prog_fd, map_fd;
+ int ret;
+ int i;
+ bool seen = false;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.map_ids = (long)map_ids;
+ prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
+ prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
+ return;
+
+ len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
+ return;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
+ return;
+
+ len = sizeof(map_info);
+ memset(&map_info, 0, len);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
+ close(map_fd);
+ return;
+ }
+
+ if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
+ if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
+ close(map_fd);
+ return;
+ }
+ seen = true;
+ }
+ close(map_fd);
+ }
+
+ ASSERT_EQ(seen, true, "no INSN_ARRAY map");
+}
+
+static void check_one_switch(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch, in[i], out[i]);
+}
+
+static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
+}
+
+static void check_two_switches(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.two_switches, in[i], out[i]);
+}
+
+static void check_big_jump_table(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 11, 27, 31, 22, 45, 99};
+ __u64 out[] = {2, 3, 4, 5, 19, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
+}
+
+static void check_one_jump_two_maps(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
+}
+
+static void check_static_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
+}
+
+static void check_nonstatic_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
+}
+
+static void check_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.simple_test_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_static_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_static_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+void test_bpf_gotox(void)
+{
+ struct bpf_gotox *skel;
+ int ret;
+
+ skel = bpf_gotox__open();
+ if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
+ return;
+
+ ret = bpf_gotox__load(skel);
+ if (!ASSERT_OK(ret, "bpf_gotox__load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("one-switch"))
+ __subtest(skel, check_one_switch);
+
+ if (test__start_subtest("one-switch-non-zero-sec-offset"))
+ __subtest(skel, check_one_switch_non_zero_sec_off);
+
+ if (test__start_subtest("two-switches"))
+ __subtest(skel, check_two_switches);
+
+ if (test__start_subtest("big-jump-table"))
+ __subtest(skel, check_big_jump_table);
+
+ if (test__start_subtest("static-global"))
+ __subtest(skel, check_static_global);
+
+ if (test__start_subtest("nonstatic-global"))
+ __subtest(skel, check_nonstatic_global);
+
+ if (test__start_subtest("other-sec"))
+ __subtest(skel, check_other_sec);
+
+ if (test__start_subtest("static-global-other-sec"))
+ __subtest(skel, check_static_global_other_sec);
+
+ if (test__start_subtest("nonstatic-global-other-sec"))
+ __subtest(skel, check_nonstatic_global_other_sec);
+
+ if (test__start_subtest("one-jump-two-maps"))
+ __subtest(skel, check_one_jump_two_maps);
+
+ if (test__start_subtest("one-map-two-jumps"))
+ __subtest(skel, check_one_map_two_jumps);
+
+ bpf_gotox__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
new file mode 100644
index 000000000000..269870bec941
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/bpf.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+static int map_create(__u32 map_type, __u32 max_entries)
+{
+ const char *map_name = "insn_array";
+ __u32 key_size = 4;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+
+ return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.fd_array = fd_array;
+ opts.fd_array_cnt = fd_array_cnt;
+
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
+}
+
+static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
+{
+ struct bpf_insn_array_value val = {};
+ int prog_fd = -1, map_fd, i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < insn_cnt; i++) {
+ val.orig_off = map_in[i];
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < insn_cnt; i++) {
+ char buf[64];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
+ ASSERT_EQ(val.xlated_off, map_out[i], buf);
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/*
+ * Load a program, which will not be anyhow mangled by the verifier. Add an
+ * insn_array map pointing to every instruction. Check that it hasn't changed
+ * after the program load.
+ */
+static void check_one_to_one_mapping(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 2, 3, 4, 5};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Load a program with two patches (get jiffies, for simplicity). Add an
+ * insn_array map pointing to every instruction. Check how it was changed
+ * after the program load.
+ */
+static void check_simple(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 4, 5, 8, 9};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Verifier can delete code in two cases: nops & dead code. From insn
+ * array's point of view, the two cases are the same, so test using
+ * the simplest method: by loading some nops
+ */
+static void check_deletions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, -1, 1, -1, 2, 3};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Same test as check_deletions, but also add code which adds instructions
+ */
+static void check_deletions_with_functions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10};
+ __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Try to load a program with a map which points to outside of the program
+ */
+static void check_out_of_bounds_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = ARRAY_SIZE(insns); /* too big */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+/*
+ * Try to load a program with a map which points to the middle of 16-bit insn
+ */
+static void check_mid_insn_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = 1; /* middle of 16-byte instruction */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+static void check_incorrect_index(void)
+{
+ check_out_of_bounds_index();
+ check_mid_insn_index();
+}
+
+static int set_bpf_jit_harden(char *level)
+{
+ char old_level;
+ int err = -1;
+ int fd = -1;
+
+ fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
+ return -1;
+ }
+
+ err = read(fd, &old_level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ err = write(fd, level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ err = 0;
+ *level = old_level;
+end:
+ if (fd >= 0)
+ close(fd);
+ return err;
+}
+
+static void check_blindness(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ char bpf_jit_harden = '@'; /* non-exizsting value */
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ bpf_jit_harden = '2';
+ if (set_bpf_jit_harden(&bpf_jit_harden)) {
+ bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ char fmt[32];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
+ ASSERT_EQ(val.xlated_off, i * 3, fmt);
+ }
+
+cleanup:
+ /* restore the old one */
+ if (bpf_jit_harden != '@')
+ set_bpf_jit_harden(&bpf_jit_harden);
+
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Once map was initialized, it should be frozen */
+static void check_load_unfrozen_map(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: now freeze the map, the program should load fine */
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Map can be used only by one BPF program */
+static void check_no_map_reuse(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd, extra_fd = -1;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable without fd_array */
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
+ goto cleanup;
+
+cleanup:
+ close(extra_fd);
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_no_lookup(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ insns[0].imm = map_fd;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable with normal map */
+ close(map_fd);
+ map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
+ insns[0].imm = map_fd;
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_side(void)
+{
+ check_bpf_no_lookup();
+}
+
+static void __test_bpf_insn_array(void)
+{
+ /* Test if offsets are adjusted properly */
+
+ if (test__start_subtest("one2one"))
+ check_one_to_one_mapping();
+
+ if (test__start_subtest("simple"))
+ check_simple();
+
+ if (test__start_subtest("deletions"))
+ check_deletions();
+
+ if (test__start_subtest("deletions-with-functions"))
+ check_deletions_with_functions();
+
+ if (test__start_subtest("blindness"))
+ check_blindness();
+
+ /* Check all kinds of operations and related restrictions */
+
+ if (test__start_subtest("incorrect-index"))
+ check_incorrect_index();
+
+ if (test__start_subtest("load-unfrozen-map"))
+ check_load_unfrozen_map();
+
+ if (test__start_subtest("no-map-reuse"))
+ check_no_map_reuse();
+
+ if (test__start_subtest("bpf-side-ops"))
+ check_bpf_side();
+}
+#else
+static void __test_bpf_insn_array(void)
+{
+ test__skip();
+}
+#endif
+
+void test_bpf_insn_array(void)
+{
+ __test_bpf_insn_array();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8a9ba4292109..054ecb6b1e9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7496,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = {
},
},
{
+ .descr = "dedup: recursive typedef",
+ /*
+ * This test simulates a recursive typedef, which in GO is defined as such:
+ *
+ * type Foo func() Foo
+ *
+ * In BTF terms, this is represented as a TYPEDEF referencing
+ * a FUNC_PROTO that returns the same TYPEDEF.
+ */
+ .input = {
+ .raw_types = {
+ /*
+ * [1] typedef Foo -> func() Foo
+ * [2] func_proto() -> Foo
+ * [3] typedef Foo -> func() Foo
+ * [4] func_proto() -> Foo
+ */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */
+ BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+},
+{
+ .descr = "dedup: typedef",
+ /*
+ * // CU 1:
+ * typedef int foo;
+ *
+ * // CU 2:
+ * typedef int foo;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ /* CU 2 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+},
+{
.descr = "dedup: typedef tags",
.input = {
.raw_types = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 82903585c870..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
* so it's impossible to determine correct pointer size; but if they
- * do, it should be 8 regardless of host architecture, becaues BPF
+ * do, it should be 8 regardless of host architecture, because BPF
* target is always 64-bit
*/
if (!t->known_ptr_sz) {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index 3696fb9a05ed..2d47cad50a51 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,11 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
+/* Write raw BTF to file, return number of bytes written or negative errno */
+static ssize_t btf_raw_write(struct btf *btf, char *file)
+{
+ ssize_t written = 0;
+ const void *data;
+ __u32 size = 0;
+ int fd, ret;
+
+ fd = mkstemp(file);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return -errno;
+
+ data = btf__raw_data(btf, &size);
+ if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
+ close(fd);
+ return -EINVAL;
+ }
+ while (written < size) {
+ ret = write(fd, data + written, size - written);
+ if (!ASSERT_GE(ret, 0, "write succeeded")) {
+ close(fd);
+ return -errno;
+ }
+ written += ret;
+ }
+ close(fd);
+ return written;
+}
+
static void __test_btf_split(bool multi)
{
+ char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
+ char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
+ char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
+ ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
struct btf_dump *d = NULL;
- const struct btf_type *t;
- struct btf *btf1, *btf2, *btf3 = NULL;
+ const struct btf_type *t, *ot;
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
+ struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -123,6 +157,45 @@ static void __test_btf_split(bool multi)
" int uf2;\n"
"};\n\n", "c_dump");
+ /* write base, split BTFs to files and ensure parsing succeeds */
+ base_btf_sz = btf_raw_write(btf1, base_btf_file);
+ if (base_btf_sz < 0)
+ goto cleanup;
+ split_btf_sz = btf_raw_write(btf2, split_btf_file);
+ if (split_btf_sz < 0)
+ goto cleanup;
+ btf4 = btf__parse(base_btf_file, NULL);
+ if (!ASSERT_OK_PTR(btf4, "parse_base"))
+ goto cleanup;
+ btf5 = btf__parse_split(split_btf_file, btf4);
+ if (!ASSERT_OK_PTR(btf5, "parse_split"))
+ goto cleanup;
+ if (multi) {
+ multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
+ if (multisplit_btf_sz < 0)
+ goto cleanup;
+ btf6 = btf__parse_split(multisplit_btf_file, btf5);
+ if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
+ goto cleanup;
+ } else {
+ btf6 = btf5;
+ }
+
+ if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
+ goto cleanup;
+
+ /* compare parsed to original BTF */
+ for (i = 1; i < btf__type_cnt(btf6); i++) {
+ t = btf__type_by_id(btf6, i);
+ if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
+ goto cleanup;
+ ot = btf__type_by_id(btf3, i);
+ if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
+ goto cleanup;
+ if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
+ goto cleanup;
+ }
+
cleanup:
if (dump_buf_file)
fclose(dump_buf_file);
@@ -132,6 +205,16 @@ cleanup:
btf__free(btf2);
if (btf2 != btf3)
btf__free(btf3);
+ btf__free(btf4);
+ btf__free(btf5);
+ if (btf5 != btf6)
+ btf__free(btf6);
+ if (base_btf_sz > 0)
+ unlink(base_btf_file);
+ if (split_btf_sz > 0)
+ unlink(split_btf_file);
+ if (multisplit_btf_sz > 0)
+ unlink(multisplit_btf_file);
}
void test_btf_split(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
index e0dd966e4a3e..5ad904e9d15d 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -44,7 +44,7 @@ static void test_read_cgroup_xattr(void)
if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
goto out;
- skel->bss->target_pid = gettid();
+ skel->bss->target_pid = sys_gettid();
if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index adda85f97058..4b42fbc96efc 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -4,6 +4,8 @@
#define _GNU_SOURCE
#include <cgroup_helpers.h>
#include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
#include "cgrp_kfunc_failure.skel.h"
#include "cgrp_kfunc_success.skel.h"
@@ -87,6 +89,72 @@ static const char * const success_tests[] = {
"test_cgrp_from_id",
};
+static void test_cgrp_from_id_ns(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ int pid, pipe_fd[2];
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = skel->progs.test_cgrp_from_id_ns;
+
+ if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork result")) {
+ close(pipe_fd[0]);
+ close(pipe_fd[1]);
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ int ret = 0;
+
+ close(pipe_fd[0]);
+
+ if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+ exit(1);
+
+ if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+ exit(1);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "test run ret"))
+ exit(1);
+
+ if (!ASSERT_OK(opts.retval, "test run retval"))
+ exit(1);
+
+ if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+ exit(1);
+
+ exit(0);
+ } else {
+ int res;
+
+ close(pipe_fd[1]);
+
+ ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+ ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+ remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+ ASSERT_OK(res, "result from run");
+ }
+
+ close(pipe_fd[0]);
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+}
+
void test_cgrp_kfunc(void)
{
int i, err;
@@ -102,6 +170,9 @@ void test_cgrp_kfunc(void)
run_success_test(success_tests[i]);
}
+ if (test__start_subtest("test_cgrp_from_id_ns"))
+ test_cgrp_from_id_ns();
+
RUN_TESTS(cgrp_kfunc_failure);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 2a9a30650350..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
}
+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+ int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+ struct __sk_buff skb = {
+ .gso_size = 10,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ /* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+ SYS_NOFAIL("ip link set dev lo mtu 10");
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}
static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
{
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+ test_chk_segs_flag(skel, mtu);
cleanup:
test_check_mtu__destroy(skel);
}
-void serial_test_check_mtu(void)
+void test_ns_check_mtu(void)
{
int mtu_lo;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 34b59f6baca1..7488a7606e6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -22,79 +22,37 @@
static int duration = 0;
-struct addr_port {
- in_port_t port;
- union {
- struct in_addr in_addr;
- struct in6_addr in6_addr;
- };
-};
-
-struct tuple {
- int family;
- struct addr_port src;
- struct addr_port dst;
-};
-
-static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
-{
- const struct sockaddr_in6 *in6;
- const struct sockaddr_in *in;
-
- switch (sa->sa_family) {
- case AF_INET:
- in = (const struct sockaddr_in *)sa;
- ap->in_addr = in->sin_addr;
- ap->port = in->sin_port;
- return true;
-
- case AF_INET6:
- in6 = (const struct sockaddr_in6 *)sa;
- ap->in6_addr = in6->sin6_addr;
- ap->port = in6->sin6_port;
- return true;
-
- default:
- return false;
- }
-}
-static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
- int *server, int *conn, struct tuple *tuple)
+static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
+ int *server, int *conn,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
{
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
- struct sockaddr *sa = (struct sockaddr *)&ss;
- *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
+ *server = start_server_addr(type, addr, len, NULL);
if (*server < 0)
return false;
- if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+ if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
goto close_server;
- *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
+ *conn = connect_to_addr(type, &ss, slen, NULL);
if (*conn < 0)
goto close_server;
/* We want to simulate packets arriving at conn, so we have to
* swap src and dst.
*/
- slen = sizeof(ss);
- if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
- goto close_conn;
-
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+ slen = sizeof(*dst);
+ if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
goto close_conn;
- slen = sizeof(ss);
- if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+ slen = sizeof(*src);
+ if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
goto close_conn;
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
- goto close_conn;
-
- tuple->family = ss.ss_family;
return true;
close_conn:
@@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
{
struct sockaddr_in *addr4;
struct sockaddr_in6 *addr6;
+ memset(addr, 0, sizeof(*addr));
switch (family) {
case AF_INET:
addr4 = (struct sockaddr_in *)addr;
- memset(addr4, 0, sizeof(*addr4));
addr4->sin_family = family;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
return sizeof(*addr4);
case AF_INET6:
addr6 = (struct sockaddr_in6 *)addr;
- memset(addr6, 0, sizeof(*addr6));
addr6->sin6_family = family;
addr6->sin6_addr = in6addr_loopback;
return sizeof(*addr6);
@@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
}
static size_t build_input(const struct test_cfg *test, void *const buf,
- const struct tuple *tuple)
+ const struct sockaddr_storage *src,
+ const struct sockaddr_storage *dst)
{
- in_port_t sport = tuple->src.port;
+ struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
+ struct sockaddr_in *src_in = (struct sockaddr_in *)src;
+ struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
+ sa_family_t family = src->ss_family;
+ in_port_t sport, dport;
encap_headers_t encap;
struct iphdr ip;
struct ipv6hdr ipv6;
@@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
uint8_t *p = buf;
int proto;
+ sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
+ dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
+
proto = IPPROTO_IPIP;
- if (tuple->family == AF_INET6)
+ if (family == AF_INET6)
proto = IPPROTO_IPV6;
encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
@@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
if (test->type == UDP)
proto = IPPROTO_UDP;
- switch (tuple->family) {
+ switch (family) {
case AF_INET:
ip = (struct iphdr){
.ihl = 5,
.version = 4,
.ttl = IPDEFTTL,
.protocol = proto,
- .saddr = tuple->src.in_addr.s_addr,
- .daddr = tuple->dst.in_addr.s_addr,
+ .saddr = src_in->sin_addr.s_addr,
+ .daddr = dst_in->sin_addr.s_addr,
};
p = mempcpy(p, &ip, sizeof(ip));
break;
@@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
.version = 6,
.hop_limit = IPDEFTTL,
.nexthdr = proto,
- .saddr = tuple->src.in6_addr,
- .daddr = tuple->dst.in6_addr,
+ .saddr = src_in6->sin6_addr,
+ .daddr = dst_in6->sin6_addr,
};
p = mempcpy(p, &ipv6, sizeof(ipv6));
break;
@@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
case TCP:
tcp = (struct tcphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
+ .syn = (test->flags == SYN),
+ .ack = (test->flags == ACK),
};
- if (test->flags == SYN)
- tcp.syn = true;
- if (test->flags == ACK)
- tcp.ack = true;
p = mempcpy(p, &tcp, sizeof(tcp));
break;
case UDP:
udp = (struct udphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
};
p = mempcpy(p, &udp, sizeof(udp));
break;
@@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog)
LIBBPF_OPTS(bpf_test_run_opts, tattr);
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
- struct sockaddr *addr;
socklen_t slen;
int i, j, err, prog_fd;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
- struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
- addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
if (CHECK_FAIL(!slen))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
&servers[UDP][i], &conns[UDP][i],
- &tuples[UDP][i])))
+ &srcs[UDP][i], &dsts[UDP][i])))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
&servers[TCP][i], &conns[TCP][i],
- &tuples[TCP][i])))
+ &srcs[TCP][i], &dsts[TCP][i])))
goto cleanup;
}
@@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog)
struct test_cfg *test = &tests[i];
for (j = 0; j < ARRAY_SIZE(families); j++) {
- struct tuple *tuple = &tuples[test->type][j];
+ struct sockaddr_storage *src = &srcs[test->type][j];
+ struct sockaddr_storage *dst = &dsts[test->type][j];
char input[256];
char tmp[256];
- test_str(tmp, sizeof(tmp), test, tuple->family);
+ test_str(tmp, sizeof(tmp), test, families[j]);
if (!test__start_subtest(tmp))
continue;
@@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
tattr.data_size_out = sizeof(tmp);
tattr.data_in = input;
- tattr.data_size_in = build_input(test, input, tuple);
+ tattr.data_size_in = build_input(test, input, src, dst);
if (CHECK_FAIL(!tattr.data_size_in))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 9b2d9ceda210..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -32,6 +32,8 @@ static struct {
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
{"test_adjust", SETUP_SYSCALL_SLEEP},
{"test_adjust_err", SETUP_SYSCALL_SLEEP},
{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index 241b2c8c6e0f..c534b4d5f9da 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id)
* 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
* 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
* 3) Close the btf_fd, now refcnt=1
- * Wait and check that BTF stil exists.
+ * Wait and check that BTF still exists.
*/
static void check_fd_array_cnt__referenced_btfs(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -12,13 +12,24 @@ void test_fentry_fexit(void)
int err, prog_fd, i;
LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test_lskel__open_and_load();
+ fentry_skel = fentry_test_lskel__open();
if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test_lskel__open_and_load();
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto close_prog;
+
+ fexit_skel = fexit_test_lskel__open();
if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
+ goto close_prog;
+
err = fentry_test_lskel__attach(fentry_skel);
if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index aee1bc77a17f..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -43,8 +43,13 @@ static void fentry_test(void)
struct fentry_test_lskel *fentry_skel = NULL;
int err;
- fentry_skel = fentry_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+ goto cleanup;
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
goto cleanup;
err = fentry_test_common(fentry_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 1c13007e37dd..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -43,8 +43,13 @@ static void fexit_test(void)
struct fexit_test_lskel *fexit_skel = NULL;
int err;
- fexit_skel = fexit_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+ goto cleanup;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
goto cleanup;
err = fexit_test_common(fexit_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c
new file mode 100644
index 000000000000..5cde32b35da4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "file_reader.skel.h"
+#include "file_reader_fail.skel.h"
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const char *user_ptr = "hello world";
+char file_contents[256000];
+
+void *get_executable_base_addr(void)
+{
+ Dl_info info;
+
+ if (!dladdr((void *)&get_executable_base_addr, &info)) {
+ fprintf(stderr, "dladdr failed\n");
+ return NULL;
+ }
+
+ return info.dli_fbase;
+}
+
+static int initialize_file_contents(void)
+{
+ int fd, page_sz = sysconf(_SC_PAGESIZE);
+ ssize_t n = 0, cur, off;
+ void *addr;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
+ return 1;
+
+ do {
+ cur = read(fd, file_contents + n, sizeof(file_contents) - n);
+ if (!ASSERT_GT(cur, 0, "read success"))
+ break;
+ n += cur;
+ } while (n < sizeof(file_contents));
+
+ close(fd);
+
+ if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
+ return 1;
+
+ addr = get_executable_base_addr();
+ if (!ASSERT_NEQ(addr, NULL, "get executable address"))
+ return 1;
+
+ /* page-align base file address */
+ addr = (void *)((unsigned long)addr & ~(page_sz - 1));
+
+ /*
+ * Page out range 0..512K, use 0..256K for positive tests and
+ * 256K..512K for negative tests expecting page faults
+ */
+ for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
+ if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
+ "madvise pageout"))
+ return errno;
+ }
+
+ return 0;
+}
+
+static void run_test(const char *prog_name)
+{
+ struct file_reader *skel;
+ struct bpf_program *prog;
+ int err, fd;
+
+ err = initialize_file_contents();
+ if (!ASSERT_OK(err, "initialize file contents"))
+ return;
+
+ skel = file_reader__open();
+ if (!ASSERT_OK_PTR(skel, "file_reader__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
+ }
+
+ memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
+ skel->bss->pid = getpid();
+
+ err = file_reader__load(skel);
+ if (!ASSERT_OK(err, "file_reader__load"))
+ goto cleanup;
+
+ err = file_reader__attach(skel);
+ if (!ASSERT_OK(err, "file_reader__attach"))
+ goto cleanup;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (fd >= 0)
+ close(fd);
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+ ASSERT_EQ(skel->bss->run_success, 1, "run_success");
+cleanup:
+ file_reader__destroy(skel);
+}
+
+void test_file_reader(void)
+{
+ if (test__start_subtest("on_open_expect_fault"))
+ run_test("on_open_expect_fault");
+
+ if (test__start_subtest("on_open_validate_file_read"))
+ run_test("on_open_validate_file_read");
+
+ if (test__start_subtest("negative"))
+ RUN_TESTS(file_reader_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
index b7b77a6b2979..0de8facca4c5 100644
--- a/tools/testing/selftests/bpf/prog_tests/free_timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -124,6 +124,10 @@ void test_free_timer(void)
int err;
skel = free_timer__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "open_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index 2bc85f4814f4..d0b405eb2966 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -15,17 +15,17 @@ struct htab_update_ctx {
static void test_reenter_update(void)
{
struct htab_update *skel;
- unsigned int key, value;
+ void *value = NULL;
+ unsigned int key, value_size;
int err;
skel = htab_update__open();
if (!ASSERT_OK_PTR(skel, "htab_update__open"))
return;
- /* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
- bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+ bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
err = htab_update__load(skel);
- if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+ if (!ASSERT_TRUE(!err, "htab_update__load") || err)
goto out;
skel->bss->pid = getpid();
@@ -33,14 +33,33 @@ static void test_reenter_update(void)
if (!ASSERT_OK(err, "htab_update__attach"))
goto out;
- /* Will trigger the reentrancy of bpf_map_update_elem() */
+ value_size = bpf_map__value_size(skel->maps.htab);
+
+ value = calloc(1, value_size);
+ if (!ASSERT_OK_PTR(value, "calloc value"))
+ goto out;
+ /*
+ * First update: plain insert. This should NOT trigger the re-entrancy
+ * path, because there is no old element to free yet.
+ */
key = 0;
- value = 0;
- err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
- if (!ASSERT_OK(err, "add element"))
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "first update (insert)"))
+ goto out;
+
+ /*
+ * Second update: replace existing element with same key and trigger
+ * the reentrancy of bpf_map_update_elem().
+ * check_and_free_fields() calls bpf_obj_free_fields() on the old
+ * value, which is where fentry program runs and performs a nested
+ * bpf_map_update_elem(), triggering -EDEADLK.
+ */
+ memset(value, 0, value_size);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "second update (replace)"))
goto out;
- ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+ ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
out:
htab_update__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
index a133354ac9bc..97b00c7efe94 100644
--- a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -16,7 +16,7 @@ void test_kernel_flag(void)
if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
return;
- lsm_skel->bss->monitored_tid = gettid();
+ lsm_skel->bss->monitored_tid = sys_gettid();
ret = test_kernel_flag__attach(lsm_skel);
if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 1de14b111931..6e35e13c2022 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
if (!ASSERT_OK(ret, "kmem_cache_lookup"))
break;
- ASSERT_STREQ(r.name, name, "kmem_cache_name");
+ ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
+ "kmem_cache_name");
ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
seen++;
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e19ef509ebf8..6cfaa978bc9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -7,6 +7,7 @@
#include "kprobe_multi_session.skel.h"
#include "kprobe_multi_session_cookie.skel.h"
#include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -422,220 +423,6 @@ static void test_unique_match(void)
kprobe_multi__destroy(skel);
}
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
-{
- return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
- return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
- if (kernel && strchr(buf, '['))
- return true;
- if (!kernel && !strchr(buf, '['))
- return true;
- return false;
-}
-
-static bool skip_entry(char *name)
-{
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- return true;
- if (!strcmp(name, "default_idle"))
- return true;
- if (!strncmp(name, "rcu_", 4))
- return true;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- return true;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
- return true;
- return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
- const char *res1, *res2;
- int len1, len2;
-
- res1 = strstr(name1, ".llvm.");
- res2 = strstr(name2, ".llvm.");
- len1 = res1 ? res1 - name1 : strlen(name1);
- len2 = res2 ? res2 - name2 : strlen(name2);
-
- if (len1 == len2)
- return strncmp(name1, name2, len1);
- if (len1 < len2)
- return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
- return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
- return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
- return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
- size_t cap = 0, cnt = 0;
- char *name = NULL, *ksym_name, **syms = NULL;
- struct hashmap *map;
- struct ksyms *ksyms;
- struct ksym *ks;
- char buf[256];
- FILE *f;
- int err = 0;
-
- ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
- if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
- return -EINVAL;
-
- /*
- * The available_filter_functions contains many duplicates,
- * but other than that all symbols are usable in kprobe multi
- * interface.
- * Filtering out duplicates by using hashmap__add, which won't
- * add existing entry.
- */
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
- if (!f)
- return -EINVAL;
-
- map = hashmap__new(symbol_hash, symbol_equal, NULL);
- if (IS_ERR(map)) {
- err = libbpf_get_error(map);
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
- continue;
- if (skip_entry(name))
- continue;
-
- ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
- if (!ks) {
- err = -EINVAL;
- goto error;
- }
-
- ksym_name = ks->name;
- err = hashmap__add(map, ksym_name, 0);
- if (err == -EEXIST) {
- err = 0;
- continue;
- }
- if (err)
- goto error;
-
- err = libbpf_ensure_mem((void **) &syms, &cap,
- sizeof(*syms), cnt + 1);
- if (err)
- goto error;
-
- syms[cnt++] = ksym_name;
- }
-
- *symsp = syms;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- hashmap__free(map);
- if (err)
- free(syms);
- return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
- unsigned long *addr, *addrs, *tmp_addrs;
- int err = 0, max_cnt, inc_cnt;
- char *name = NULL;
- size_t cnt = 0;
- char buf[256];
- FILE *f;
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
- if (!f)
- return -ENOENT;
-
- /* In my local setup, the number of entries is 50k+ so Let us initially
- * allocate space to hold 64k entries. If 64k is not enough, incrementally
- * increase 1k each time.
- */
- max_cnt = 65536;
- inc_cnt = 1024;
- addrs = malloc(max_cnt * sizeof(long));
- if (addrs == NULL) {
- err = -ENOMEM;
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
- continue;
- if (skip_entry(name))
- continue;
-
- if (cnt == max_cnt) {
- max_cnt += inc_cnt;
- tmp_addrs = realloc(addrs, max_cnt);
- if (!tmp_addrs) {
- err = -ENOMEM;
- goto error;
- }
- addrs = tmp_addrs;
- }
-
- addrs[cnt++] = (unsigned long)addr;
- }
-
- *addrsp = addrs;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- if (err)
- free(addrs);
- return err;
-}
-
static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
long attach_start_ns, attach_end_ns;
@@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
char **syms = NULL;
size_t cnt = 0;
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
size_t cnt = 0;
int err;
- err = get_addrs(&addrs, &cnt, kernel);
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
if (err == -ENOENT) {
test__skip();
return;
}
- if (!ASSERT_OK(err, "get_addrs"))
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -753,6 +540,30 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
void serial_test_kprobe_multi_bench_attach(void)
{
if (test__start_subtest("kernel"))
@@ -792,5 +603,7 @@ void test_kprobe_multi_test(void)
test_session_cookie_skel_api();
if (test__start_subtest("unique_match"))
test_unique_match();
+ if (test__start_subtest("attach_write_ctx"))
+ test_attach_write_ctx();
RUN_TESTS(kprobe_multi_verifier);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+ char path[4096];
+
+ /* CI will set KBUILD_OUTPUT */
+ snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+ getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+ return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+ /* Disable the livepatch before unloading the module */
+ system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+ unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+ char buf[4096];
+ int fd, ret;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+ return;
+
+ ret = read(fd, buf, sizeof(buf));
+ if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+ goto out;
+
+ ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+ close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+ struct livepatch_trampoline *skel = NULL;
+ int err;
+
+ skel = livepatch_trampoline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ if (!fexit_first) {
+ /* fentry program is loaded first by default */
+ err = livepatch_trampoline__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+ } else {
+ /* Manually load fexit program first. */
+ skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+ goto out;
+
+ skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+ goto out;
+ }
+
+ read_proc_cmdline();
+
+ ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+ ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+ livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+ int retry_cnt = 0;
+
+retry:
+ if (load_livepatch()) {
+ if (retry_cnt) {
+ ASSERT_OK(1, "load_livepatch");
+ goto out;
+ }
+ /*
+ * Something else (previous run of the same test?) loaded
+ * the KLP module. Unload the KLP module and retry.
+ */
+ unload_livepatch();
+ retry_cnt++;
+ goto retry;
+ }
+
+ if (test__start_subtest("fentry_first"))
+ __test_livepatch_trampoline(false);
+
+ if (test__start_subtest("fexit_first"))
+ __test_livepatch_trampoline(true);
+out:
+ unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, true);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+ err = map_excl__load(skel);
+ ASSERT_OK(err, "map_excl__load");
+out:
+ map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, false);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+ err = map_excl__load(skel);
+ ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+ map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+ if (test__start_subtest("map_excl_allowed"))
+ test_map_excl_allowed();
+ if (test__start_subtest("map_excl_denied"))
+ test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d391d95f96e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -90,7 +90,7 @@ void test_module_attach(void)
test_module_attach__detach(skel);
- /* attach fentry/fexit and make sure it get's module reference */
+ /* attach fentry/fexit and make sure it gets module reference */
link = bpf_program__attach(skel->progs.handle_fentry);
if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index f8eb7f9d4fd2..8fade8bdc451 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,11 +6,13 @@
#include <netinet/in.h>
#include <test_progs.h>
#include <unistd.h>
+#include <errno.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
#include "mptcpify.skel.h"
#include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
#define NS_TEST "mptcp_ns"
#define ADDR_1 "10.0.1.1"
@@ -436,6 +438,142 @@ close_cgroup:
close(cgroup_fd);
}
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+ int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+ char snd[9] = "123456789";
+ char rcv[10];
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client without MPTCP enabled */
+ client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd1 = accept(listen_fd, NULL, 0);
+ skel->bss->sk_index = 1;
+ client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd2 = accept(listen_fd, NULL, 0);
+ /* test normal redirect behavior: data sent by client_fd1 can be
+ * received by client_fd2
+ */
+ skel->bss->redirect_idx = 1;
+ sent = send(client_fd1, snd, sizeof(snd), 0);
+ if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+ goto end;
+
+ /* try to recv more bytes to avoid truncation check */
+ recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+ if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+ goto end;
+
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (client_fd2 >= 0)
+ close(client_fd2);
+ if (server_fd1 >= 0)
+ close(server_fd1);
+ if (server_fd2 >= 0)
+ close(server_fd2);
+ close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+ int err, zero = 0;
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client with MPTCP enabled */
+ client_fd1 = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+ goto end;
+
+ /* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+ if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+ goto end;
+
+ server_fd = accept(listen_fd, NULL, 0);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &server_fd, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+ goto end;
+
+ /* MPTCP client should also be disallowed */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &client_fd1, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+ goto end;
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (server_fd >= 0)
+ close(server_fd);
+ close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+ struct mptcp_sockmap *skel;
+ struct netns_obj *netns;
+ int cgroup_fd, err;
+
+ cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+ return;
+
+ skel = mptcp_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+ goto close_cgroup;
+
+ skel->links.mptcp_sockmap_inject =
+ bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+ goto skel_destroy;
+
+ err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+ bpf_map__fd(skel->maps.sock_map),
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+ goto skel_destroy;
+
+ if (endpoint_init("subflow") < 0)
+ goto close_netns;
+
+ test_sockmap_with_mptcp_fallback(skel);
+ test_sockmap_reject_mptcp(skel);
+
+close_netns:
+ netns_free(netns);
+skel_destroy:
+ mptcp_sockmap__destroy(skel);
+close_cgroup:
+ close(cgroup_fd);
+}
+
void test_mptcp(void)
{
if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
test_mptcpify();
if (test__start_subtest("subflow"))
test_subflow();
+ if (test__start_subtest("sockmap"))
+ test_mptcp_sockmap();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index bc24f83339d6..0a7ef770c487 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
int pbe_size = sizeof(struct perf_branch_entry);
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
int written_stack = skel->bss->written_stack_out;
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
goto out_destroy;
- /* spin the loop for a while (random high number) */
- for (i = 0; i < 1000000; ++i)
+
+ /* Spin the loop for a while by using a high iteration count, and by
+ * checking whether the specific run count marker has been explicitly
+ * incremented at least once by the backing perf_event BPF program.
+ */
+ for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
++j;
test_perf_branches__detach(skel);
@@ -116,11 +128,11 @@ static void test_perf_branches_hw(void)
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
/*
- * Some setups don't support branch records (virtual machines, !x86),
- * so skip test in this case.
+ * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
+ * 3 which only supports BRS), so skip test in this case.
*/
if (pfd < 0) {
- if (errno == ENOENT || errno == EOPNOTSUPP) {
+ if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+ const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+ const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+ struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* load the object a first time */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+ goto out;
+
+ /* load the object a second time, re-using the pinned map */
+ skel2 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+ goto out;
+
+ /* we can close the reference safely without
+ * the map's refcount falling to 0
+ */
+ test_pinning_devmap__destroy(skel1);
+ skel1 = NULL;
+
+ /* now, swap the pins */
+ err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "swap pins"))
+ goto out;
+
+ /* load the object again, this time the re-use should fail */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+ goto out;
+
+out:
+ unlink(pinpath1);
+ unlink(pinpath2);
+ test_pinning_devmap__destroy(skel1);
+ test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
new file mode 100644
index 000000000000..16bd74be3dbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_pinning_htab.skel.h"
+
+static void unpin_map(const char *map_name, const char *pin_path)
+{
+ struct test_pinning_htab *skel;
+ struct bpf_map *map;
+ int err;
+
+ skel = test_pinning_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
+ goto out;
+
+ err = bpf_map__pin(map, pin_path);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = bpf_map__unpin(map, pin_path);
+ ASSERT_OK(err, "bpf_map__unpin");
+out:
+ test_pinning_htab__destroy(skel);
+}
+
+void test_pinning_htab(void)
+{
+ if (test__start_subtest("timer_prealloc"))
+ unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
+ if (test__start_subtest("timer_no_prealloc"))
+ unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
index 14f2796076e0..7607cfc2408c 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -54,3 +54,128 @@ void test_prog_tests_framework(void)
return;
clear_test_state(state);
}
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+ struct expected_msgs msgs;
+ int i, j, error_cnt;
+ const struct {
+ const char *name;
+ const char *log;
+ const char *expected;
+ struct expect_msg *pats;
+ } cases[] = {
+ {
+ .name = "simple-ok",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "simple-fail",
+ .log = "aaabbbddd",
+ .expected = "MATCHED SUBSTR: 'aaa'\n"
+ "EXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-mid",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ { .substr = "bar", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-tail",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-head",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "foo", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-head",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa", .negative = true },
+ { .substr = "bbb" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-tail",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "bbb" },
+ { .substr = "ccc", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-1",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-2",
+ .log = "aaabbb222ccc",
+ .expected = "UNEXPECTED SUBSTR: '222'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "222", .negative = true },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cases); i++) {
+ if (test__start_subtest(cases[i].name)) {
+ error_cnt = env.subtest_state->error_cnt;
+ msgs.patterns = cases[i].pats;
+ msgs.cnt = 0;
+ for (j = 0; cases[i].pats[j].substr; j++)
+ msgs.cnt++;
+ validate_msgs(cases[i].log, &msgs, dummy_emit);
+ fflush(stderr);
+ env.subtest_state->error_cnt = error_cnt;
+ if (cases[i].expected)
+ ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+ else
+ ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+ test__end_subtest();
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index c9f855e5da24..246eb259c08a 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -28,6 +28,7 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.two_regions, true);
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
@@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = {
"non_sleepable_rcu_mismatch",
"inproper_sleepable_helper",
"inproper_sleepable_kfunc",
- "nested_rcu_region",
+ "nested_rcu_region_unbalanced_1",
+ "nested_rcu_region_unbalanced_2",
"rcu_read_lock_global_subprog_lock",
"rcu_read_lock_global_subprog_unlock",
"rcu_read_lock_sleepable_helper_global_subprog",
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d6bd5e16e637..d2c0542716a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
refcounted_kptr__destroy(skel);
}
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+ struct refcounted_kptr *skel;
+ int cpu_nr, fd, err, key = 0;
+ struct bpf_map *map;
+ size_t values_sz;
+ u64 *values;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ cpu_nr = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ values = calloc(cpu_nr, sizeof(u64));
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+ free(values);
+ return;
+ }
+
+ values_sz = cpu_nr * sizeof(u64);
+ memset(values, 0, values_sz);
+
+ map = skel->maps.percpu_hash;
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+ goto out;
+
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+ refcounted_kptr__destroy(skel);
+ free(values);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index e261b0e872db..d93a0c7b1786 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -623,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a, x.b);
*newy = range(t, y.a + 1, y.b);
} else if (x.a == x.b && x.b == y.b) {
- /* X is a constant matching rigth side of Y */
+ /* X is a constant matching right side of Y */
*newx = range(t, x.a, x.b);
*newy = range(t, y.a, y.b - 1);
} else if (y.a == y.b && x.a == y.a) {
@@ -631,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a + 1, x.b);
*newy = range(t, y.a, y.b);
} else if (y.a == y.b && x.b == y.b) {
- /* Y is a constant matching rigth side of X */
+ /* Y is a constant matching right side of X */
*newx = range(t, x.a, x.b - 1);
*newy = range(t, y.a, y.b);
} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
index 0703e987df89..f0a8c828f8f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -99,3 +99,19 @@ end:
res_spin_lock__destroy(skel);
return;
}
+
+void serial_test_res_spin_lock_stress(void)
+{
+ if (libbpf_num_possible_cpus() < 3) {
+ test__skip();
+ return;
+ }
+
+ ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+ /*
+ * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
+ * other cases (ABBA, ABBCCA).
+ */
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index d1e4cb28a72c..64520684d2cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
#include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_overwrite.lskel.h"
#define EDONE 7777
@@ -497,6 +498,68 @@ cleanup:
test_ringbuf_map_key_lskel__destroy(skel_map_key);
}
+static void ringbuf_overwrite_mode_subtest(void)
+{
+ unsigned long size, len1, len2, len3, len4, len5;
+ unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
+ struct test_ringbuf_overwrite_lskel *skel;
+ int page_size = getpagesize();
+ int err;
+
+ skel = test_ringbuf_overwrite_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ size = page_size;
+ len1 = page_size / 2;
+ len2 = page_size / 4;
+ len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
+ len4 = len3 - 8;
+ len5 = len3; /* retry with len3 */
+
+ skel->maps.ringbuf.max_entries = size;
+ skel->rodata->LEN1 = len1;
+ skel->rodata->LEN2 = len2;
+ skel->rodata->LEN3 = len3;
+ skel->rodata->LEN4 = len4;
+ skel->rodata->LEN5 = len5;
+
+ skel->bss->pid = getpid();
+
+ err = test_ringbuf_overwrite_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_ringbuf_overwrite_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
+ ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
+ ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
+ ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
+ ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
+
+ ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
+
+ expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
+
+ ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
+
+ expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
+
+ expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
+
+ test_ringbuf_overwrite_lskel__detach(skel);
+cleanup:
+ test_ringbuf_overwrite_lskel__destroy(skel);
+}
+
void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
@@ -507,4 +570,6 @@ void test_ringbuf(void)
ringbuf_map_key_subtest();
if (test__start_subtest("ringbuf_write"))
ringbuf_write_subtest();
+ if (test__start_subtest("ringbuf_overwrite_mode"))
+ ringbuf_overwrite_mode_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 036d4760d2c1..3dbcc091f16c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -41,11 +41,7 @@ static struct bpf_object *obj;
static __u32 index_zero;
static int epfd;
-static union sa46 {
- struct sockaddr_in6 v6;
- struct sockaddr_in v4;
- sa_family_t family;
-} srv_sa;
+static struct sockaddr_storage srv_sa;
#define RET_IF(condition, tag, format...) ({ \
if (CHECK_FAIL(condition)) { \
@@ -135,24 +131,24 @@ static int prepare_bpf_obj(void)
return 0;
}
-static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_loopback;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
else
- sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
}
-static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_any;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
else
- sa->v4.sin_addr.s_addr = INADDR_ANY;
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
}
static int read_int_sysctl(const char *sysctl)
@@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
int cli_fd)
{
struct data_check expected = {}, result;
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
socklen_t addrlen;
int err;
@@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
}
if (family == AF_INET6) {
+ struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
+ struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IPV6);
- expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
- !srv_sa.v6.sin6_addr.s6_addr32[2] &&
- !srv_sa.v6.sin6_addr.s6_addr32[1] &&
- !srv_sa.v6.sin6_addr.s6_addr32[0];
+ expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
+ !srv_v6->sin6_addr.s6_addr32[2] &&
+ !srv_v6->sin6_addr.s6_addr32[1] &&
+ !srv_v6->sin6_addr.s6_addr32[0];
- memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
- sizeof(cli_sa.v6.sin6_addr));
+ memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
+ sizeof(cli_v6->sin6_addr));
memcpy(&expected.skb_addrs[4], &in6addr_loopback,
sizeof(in6addr_loopback));
- expected.skb_ports[0] = cli_sa.v6.sin6_port;
- expected.skb_ports[1] = srv_sa.v6.sin6_port;
+ expected.skb_ports[0] = cli_v6->sin6_port;
+ expected.skb_ports[1] = srv_v6->sin6_port;
} else {
+ struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
+ struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IP);
- expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+ expected.bind_inany = !srv_v4->sin_addr.s_addr;
- expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+ expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
- expected.skb_ports[0] = cli_sa.v4.sin_port;
- expected.skb_ports[1] = srv_sa.v4.sin_port;
+ expected.skb_ports[0] = cli_v4->sin_port;
+ expected.skb_ports[1] = srv_v4->sin_port;
}
if (memcmp(&result, &expected, offsetof(struct data_check,
@@ -364,16 +366,15 @@ static void check_results(void)
static int send_data(int type, sa_family_t family, void *data, size_t len,
enum result expected)
{
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
int fd, err;
fd = socket(family, type, 0);
RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
- sa46_init_loopback(&cli_sa, family);
+ ss_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
-
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
RET_ERR(err != len && expected >= PASS,
@@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
socklen_t addrlen;
if (inany)
- sa46_init_inany(&srv_sa, family);
+ ss_init_inany(&srv_sa, family);
else
- sa46_init_loopback(&srv_sa, family);
+ ss_init_loopback(&srv_sa, family);
addrlen = sizeof(srv_sa);
/*
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 1702aa592c2c..7ac4d5a488aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -206,6 +206,11 @@ destroy_skel:
skel_open_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
+ /*
+ * Child is either about to exit cleanly or stuck in case of errors.
+ * Nudge it to exit.
+ */
+ kill(pid, SIGKILL);
wait(NULL);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c
new file mode 100644
index 000000000000..604a0b1423d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sha256.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+
+#define MAX_LEN 4096
+
+/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */
+void test_sha256(void)
+{
+ /*
+ * The correctness of this value was verified by running this test with
+ * libbpf_sha256() replaced by OpenSSL's SHA256().
+ */
+ static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = {
+ 0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74,
+ 0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04,
+ 0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90,
+ 0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42,
+ };
+ __u64 seed = 0;
+ __u8 *data = NULL, *digests = NULL;
+ __u8 digest_of_digests[SHA256_DIGEST_LENGTH];
+ size_t i;
+
+ data = malloc(MAX_LEN);
+ if (!ASSERT_OK_PTR(data, "malloc"))
+ goto out;
+ digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH);
+ if (!ASSERT_OK_PTR(digests, "malloc"))
+ goto out;
+
+ /* Generate MAX_LEN bytes of "random" data deterministically. */
+ for (i = 0; i < MAX_LEN; i++) {
+ seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1);
+ data[i] = (__u8)(seed >> 16);
+ }
+
+ /* Calculate a digest for each length 0 through MAX_LEN inclusively. */
+ for (i = 0; i <= MAX_LEN; i++)
+ libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]);
+
+ /* Calculate and verify the digest of all the digests. */
+ libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH,
+ digest_of_digests);
+ ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests,
+ SHA256_DIGEST_LENGTH, "digest_of_digests");
+out:
+ free(data);
+ free(digests);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES 32
+#define NR_SOCKETS 2
+#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE 1024
+#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+ char name[8];
+ int family;
+ int type;
+ int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+ long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int server, i, err = 0;
+
+ server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (!ASSERT_GE(server, 0, "start_server_str"))
+ return server;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = connect_to_fd(server, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "connect_to_fd");
+ err = sk[i];
+ break;
+ }
+
+ sk[i + 1] = accept(server, NULL, NULL);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "accept");
+ err = sk[i + 1];
+ break;
+ }
+ }
+
+ close(server);
+
+ return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int i, j, err, rcvbuf = BUF_TOTAL;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "start_server");
+ return sk[i];
+ }
+
+ sk[i + 1] = connect_to_fd(sk[i], 0);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+ return sk[i + 1];
+ }
+
+ err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+ if (err) {
+ ASSERT_EQ(err, 0, "connect_fd_to_fd");
+ return err;
+ }
+
+ for (j = 0; j < 2; j++) {
+ err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+ if (err) {
+ ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+ bool *activated, long *memory_allocated)
+{
+ int sk;
+
+ *activated = true;
+
+ /* AF_INET and AF_INET6 share the same memory_allocated.
+ * tcp_init_sock() is called by AF_INET and AF_INET6,
+ * but udp_lib_init_sock() is inline.
+ */
+ sk = socket(AF_INET, test_case->type, 0);
+ if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+ return -1;
+
+ close(sk);
+
+ return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->tcp_activated,
+ &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->udp_activated,
+ &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+ struct sk_bypass_prot_mem *skel, bool bypass)
+{
+ char buf[BUF_SINGLE] = {};
+ long memory_allocated[2];
+ int sk[NR_SOCKETS];
+ int err, i, j;
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++)
+ sk[i] = -1;
+
+ err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+ if (err)
+ goto close;
+
+ memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+ /* allocate pages >= NR_PAGES */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+ /* Avoid too noisy logs when something failed. */
+ if (bytes != sizeof(buf)) {
+ ASSERT_EQ(bytes, sizeof(buf), "send");
+ if (bytes < 0) {
+ err = bytes;
+ goto drain;
+ }
+ }
+ }
+ }
+
+ memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+ if (bypass)
+ ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+ else
+ ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+ if (test_case->type == SOCK_DGRAM) {
+ /* UDP starts purging sk->sk_receive_queue after one RCU
+ * grace period, then udp_memory_allocated goes down,
+ * so drain the queue before close().
+ */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+ if (bytes == sizeof(buf))
+ continue;
+ if (bytes != -1 || errno != EAGAIN)
+ PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+ break;
+ }
+ }
+ }
+
+close:
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ if (sk[i] < 0)
+ break;
+
+ close(sk[i]);
+ }
+
+ return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+ struct sk_bypass_prot_mem *skel;
+ struct nstoken *nstoken;
+ int cgroup, err;
+
+ skel = sk_bypass_prot_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = sk_bypass_prot_mem__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto destroy_skel;
+
+ cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+ if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+ goto destroy_skel;
+
+ err = make_netns("sk_bypass_prot_mem");
+ if (!ASSERT_EQ(err, 0, "make_netns"))
+ goto close_cgroup;
+
+ nstoken = open_netns("sk_bypass_prot_mem");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto remove_netns;
+
+ err = check_bypass(test_case, skel, false);
+ if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+ goto close_netns;
+
+ skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+ if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+ close_netns(nstoken);
+remove_netns:
+ remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+ close(cgroup);
+destroy_skel:
+ sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+ {
+ .name = "TCP ",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDP ",
+ .family = AF_INET,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+ {
+ .name = "TCPv6",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDPv6",
+ .family = AF_INET6,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (test__start_subtest(test_cases[i].name))
+ run_test(&test_cases[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
index e02cabcc814e..0d59503a0c73 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -17,11 +17,16 @@
#define VMADDR_CID_LOCAL 1
#endif
+/* include/linux/compiler_types.h */
+#if __STDC_VERSION__ < 202311L && !defined(auto)
+# define auto __auto_type
+#endif
+
/* include/linux/cleanup.h */
#define __get_and_null(p, nullvalue) \
({ \
- __auto_type __ptr = &(p); \
- __auto_type __val = *__ptr; \
+ auto __ptr = &(p); \
+ auto __val = *__ptr; \
*__ptr = nullvalue; \
__val; \
})
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index e3ea5dc2f697..254fbfeab06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
+ "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
- " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
" R0=map_value(id=2,ks=4,vs=8)"
- " R1_w=map_value(id=2,ks=4,vs=8)\n"
+ " R1=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index b7ba5cd47d96..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -39,7 +39,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0832fd787457..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -66,7 +66,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+ __u64 ips[PERF_MAX_STACK_DEPTH];
+ struct ksyms *ksyms = NULL;
+ int i, err = 0;
+ va_list args;
+
+ /* sorted by addr */
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return -1;
+
+ /* unlikely, but... */
+ if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+ return -1;
+
+ err = bpf_map_lookup_elem(fd, &key, ips);
+ if (err)
+ goto out;
+
+ /*
+ * Compare all symbols provided via arguments with stacktrace ips,
+ * and their related symbol addresses.t
+ */
+ va_start(args, cnt);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned long val;
+ struct ksym *ksym;
+
+ val = va_arg(args, unsigned long);
+ ksym = ksym_search_local(ksyms, ips[i]);
+ if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+ break;
+ ASSERT_EQ(ksym->addr, val, "stack_cmp");
+ }
+
+ va_end(args);
+
+out:
+ free_kallsyms_local(ksyms);
+ return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.kprobe_multi_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+ __u32 info_len = sizeof(struct bpf_prog_info);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_info info = {};
+ struct stacktrace_ips *skel;
+ __u64 bpf_prog_ksym = 0;
+ int err;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+ skel->progs.rawtp_test,
+ "bpf_testmod_test_read");
+ if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+ goto cleanup;
+
+ /* get bpf program address */
+ info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+ info.nr_jited_ksyms = 1;
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+ &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+ bpf_prog_ksym,
+ ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+ if (test__start_subtest("kprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(false);
+ if (test__start_subtest("kretprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(true);
+ if (test__start_subtest("raw_tp"))
+ test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+ test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+ __test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index df59e4ae2951..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "stacktrace_map.skel.h"
void test_stacktrace_map(void)
{
+ struct stacktrace_map *skel;
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "oncpu";
- int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.bpf.o";
- __u32 key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
+ int err, stack_trace_len;
+ __u32 key, val, stack_id, duration = 0;
+ __u64 stack[PERF_MAX_STACK_DEPTH];
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = stacktrace_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (!ASSERT_OK_PTR(link, "attach_tp"))
- goto close_prog;
-
- /* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK_FAIL(control_map_fd < 0))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK_FAIL(stackid_hmap_fd < 0))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK_FAIL(stackmap_fd < 0))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK_FAIL(stack_amap_fd < 0))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ err = stacktrace_map__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
/* give some time for bpf program run */
sleep(1);
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
-
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+ goto out;
+
+ stack_id = skel->bss->stack_id;
+ err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+ goto out;
+
+ err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+ goto out;
+out:
+ stacktrace_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index c6ef06f55cdb..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
{
const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.bpf.o";
+ const char *file = "./stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
index 1932b1e0685c..dc2ccf6a14d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void)
skel->bss->control = 1;
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
index d9f0185dca61..c3cce5c292bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/stream.c
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <sys/mman.h>
-#include <regex.h>
#include "stream.skel.h"
#include "stream_fail.skel.h"
@@ -18,87 +17,6 @@ void test_stream_success(void)
return;
}
-struct {
- int prog_off;
- const char *errstr;
-} stream_error_arr[] = {
- {
- offsetof(struct stream, progs.stream_cond_break),
- "ERROR: Timeout detected for may_goto instruction\n"
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
- {
- offsetof(struct stream, progs.stream_deadlock),
- "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n"
- "Attempted lock = (0x[0-9a-fA-F]+)\n"
- "Total held locks = 1\n"
- "Held lock\\[ 0\\] = \\1\n" // Lock address must match
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
-};
-
-static int match_regex(const char *pattern, const char *string)
-{
- int err, rc;
- regex_t re;
-
- err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE);
- if (err)
- return -1;
- rc = regexec(&re, string, 0, NULL, 0);
- regfree(&re);
- return rc == 0 ? 1 : 0;
-}
-
-void test_stream_errors(void)
-{
- LIBBPF_OPTS(bpf_test_run_opts, opts);
- LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
- struct stream *skel;
- int ret, prog_fd;
- char buf[1024];
-
- skel = stream__open_and_load();
- if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
- return;
-
- for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) {
- struct bpf_program **prog;
-
- prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off);
- prog_fd = bpf_program__fd(*prog);
- ret = bpf_prog_test_run_opts(prog_fd, &opts);
- ASSERT_OK(ret, "ret");
- ASSERT_OK(opts.retval, "retval");
-
-#if !defined(__x86_64__)
- ASSERT_TRUE(1, "Timed may_goto unsupported, skip.");
- if (i == 0) {
- ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
- ASSERT_EQ(ret, 0, "stream read");
- continue;
- }
-#endif
-
- ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
- ASSERT_GT(ret, 0, "stream read");
- ASSERT_LE(ret, 1023, "len for buf");
- buf[ret] = '\0';
-
- ret = match_regex(stream_error_arr[i].errstr, buf);
- if (!ASSERT_TRUE(ret == 1, "regex match"))
- fprintf(stderr, "Output from stream:\n%s\n", buf);
- }
-
- stream__destroy(skel);
-}
-
void test_stream_syscall(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
@@ -139,3 +57,52 @@ void test_stream_syscall(void)
stream__destroy(skel);
}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret, prog_fd;
+ char fault_addr[64];
+ char buf[1024];
+
+ prog_fd = bpf_program__fd(prog);
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+ }
+}
+
+void test_stream_arena_fault_address(void)
+{
+ struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+ printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+ test__skip();
+ return;
+#endif
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ if (test__start_subtest("read_fault"))
+ test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+ if (test__start_subtest("write_fault"))
+ test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c